git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4848 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
f1ori 2008-05-24 16:12:16 +00:00
parent 25192e0d36
commit fd8bd5d0d1
3 changed files with 8 additions and 7 deletions

View File

@ -106,7 +106,7 @@ public class get {
while (it.hasNext()) {
tag = it.next();
if(!tag.getTagName().startsWith("/")) { // ignore folder tags
prop.put("tags_"+count+"_name", tag.getTagName());
prop.putHTML("tags_"+count+"_name", tag.getTagName(), true);
prop.put("tags_"+count+"_count", tag.size());
count++;
}

View File

@ -74,7 +74,7 @@ public class getpageinfo_p {
String url=(String) post.get("url");
if(url.toLowerCase().startsWith("ftp://")){
prop.put("robots-allowed", "1");
prop.putHTML("title", "FTP: "+url);
prop.putHTML("title", "FTP: "+url, true);
return prop;
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
url = "http://" + url;
@ -93,7 +93,7 @@ public class getpageinfo_p {
writer.close();
// put the document title
prop.putHTML("title", scraper.getTitle());
prop.putHTML("title", scraper.getTitle(), true);
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@ -108,7 +108,7 @@ public class getpageinfo_p {
i++;
tag += " "+list[i];
}
prop.putHTML("tags_"+count+"_tag", tag);
prop.putHTML("tags_"+count+"_tag", tag, true);
count++;
}
}
@ -127,7 +127,7 @@ public class getpageinfo_p {
// get the sitemap URL of the domain
yacyURL sitemapURL = robotsParser.getSitemapURL(theURL);
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true);
} catch (MalformedURLException e) {}
}

View File

@ -61,6 +61,7 @@ import java.util.Properties;
import javax.swing.event.EventListenerList;
import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
@ -180,11 +181,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
} else {
name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), tagopts.getProperty("content",""));
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
}
}
}