mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
* fix for http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1176 (encoding issue)
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4848 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
25192e0d36
commit
fd8bd5d0d1
|
@ -106,7 +106,7 @@ public class get {
|
|||
while (it.hasNext()) {
|
||||
tag = it.next();
|
||||
if(!tag.getTagName().startsWith("/")) { // ignore folder tags
|
||||
prop.put("tags_"+count+"_name", tag.getTagName());
|
||||
prop.putHTML("tags_"+count+"_name", tag.getTagName(), true);
|
||||
prop.put("tags_"+count+"_count", tag.size());
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ public class getpageinfo_p {
|
|||
String url=(String) post.get("url");
|
||||
if(url.toLowerCase().startsWith("ftp://")){
|
||||
prop.put("robots-allowed", "1");
|
||||
prop.putHTML("title", "FTP: "+url);
|
||||
prop.putHTML("title", "FTP: "+url, true);
|
||||
return prop;
|
||||
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
|
||||
url = "http://" + url;
|
||||
|
@ -93,7 +93,7 @@ public class getpageinfo_p {
|
|||
writer.close();
|
||||
|
||||
// put the document title
|
||||
prop.putHTML("title", scraper.getTitle());
|
||||
prop.putHTML("title", scraper.getTitle(), true);
|
||||
|
||||
// put the favicon that belongs to the document
|
||||
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
|
||||
|
@ -108,7 +108,7 @@ public class getpageinfo_p {
|
|||
i++;
|
||||
tag += " "+list[i];
|
||||
}
|
||||
prop.putHTML("tags_"+count+"_tag", tag);
|
||||
prop.putHTML("tags_"+count+"_tag", tag, true);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
@ -127,7 +127,7 @@ public class getpageinfo_p {
|
|||
|
||||
// get the sitemap URL of the domain
|
||||
yacyURL sitemapURL = robotsParser.getSitemapURL(theURL);
|
||||
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
|
||||
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true);
|
||||
} catch (MalformedURLException e) {}
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ import java.util.Properties;
|
|||
|
||||
import javax.swing.event.EventListenerList;
|
||||
|
||||
import de.anomic.data.htmlTools;
|
||||
import de.anomic.http.HttpClient;
|
||||
import de.anomic.server.serverCharBuffer;
|
||||
import de.anomic.server.serverFileUtils;
|
||||
|
@ -180,11 +181,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
|
|||
if (tagname.equalsIgnoreCase("meta")) {
|
||||
String name = tagopts.getProperty("name", "");
|
||||
if (name.length() > 0) {
|
||||
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
|
||||
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
|
||||
} else {
|
||||
name = tagopts.getProperty("http-equiv", "");
|
||||
if (name.length() > 0) {
|
||||
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
|
||||
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user