FULL redesign of algorithms in htmlTools to encode/decode strings from/to unicode and html.

The old process used a not really efficient way to detect html encoding strings in texts.
All calling methods had been adoped to call the new class in an enhanced way with less parameters.

Many classes in interfaces used a XML encoding only (instead of full html conversion from unicode to html); this behavior was not changed with this commit but should be controlled again since it points out possible XSS leaks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5295 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-10-22 18:59:04 +00:00
parent 958ec20cd0
commit 0edec2b760
30 changed files with 232 additions and 233 deletions

View File

@ -319,7 +319,7 @@ prop.putHTML("asd", "0");
while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername);
prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_hash", Hash);
prop.putHTML(DISABLED + "otherHosts_" + peerCount + "_name", peername, true);
prop.putXML(DISABLED + "otherHosts_" + peerCount + "_name", peername);
hostList.remove(peername);
peerCount++;
}
@ -332,14 +332,14 @@ prop.putHTML("asd", "0");
int blacklistCount = 0;
if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) {
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i], true);
prop.putXML(DISABLED + BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "0");
if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.putHTML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes], true);
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
listManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i]) ? "0" : "1");
}
@ -366,7 +366,7 @@ prop.putHTML("asd", "0");
}
prop.put(DISABLED + "blackLists", blacklistCount);
prop.putHTML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse, true);
prop.putXML(DISABLED + "currentBlacklist", (blacklistToUse==null) ? "" : blacklistToUse);
prop.put("disabled", (blacklistToUse == null) ? "1" : "0");
return prop;
}

View File

@ -180,9 +180,9 @@ public class Blog {
try {
prop.put("mode", "1"); //edit
prop.put("mode_commentMode", page.getCommentMode());
prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"), xml);
prop.putHTML("mode_author", new String(page.getAuthor(),"UTF-8"));
prop.put("mode_pageid", page.getKey());
prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"), xml);
prop.putHTML("mode_subject", new String(page.getSubject(), "UTF-8"));
prop.put("mode_page-code", new String(page.getPage(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {}
}
@ -195,16 +195,16 @@ public class Blog {
if(hasRights) {
prop.put("mode", "2");//preview
prop.put("mode_commentMode", post.getInt("commentMode", 1));
prop.putHTML("mode_pageid", pagename, xml);
prop.putHTML("mode_pageid", pagename);
try {
prop.putHTML("mode_author", new String(author, "UTF-8"), xml);
prop.putHTML("mode_author", new String(author, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_author", new String(author), xml);
prop.putHTML("mode_author", new String(author));
}
prop.putHTML("mode_subject", post.get("subject",""), xml);
prop.putHTML("mode_subject", post.get("subject",""));
prop.put("mode_date", dateString(new Date()));
prop.putWiki("mode_page", post.get("content", ""));
prop.putHTML("mode_page-code", post.get("content", ""), xml);
prop.putHTML("mode_page-code", post.get("content", ""));
}
else {
prop.put("mode", "3"); //access denied (no rights)
@ -213,16 +213,16 @@ public class Blog {
else if(post.get("delete", "").equals("try")) {
if(hasRights) {
prop.put("mode", "4");
prop.putHTML("mode_pageid", pagename, xml);
prop.putHTML("mode_pageid", pagename);
try {
prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"), xml);
prop.putHTML("mode_author",new String(page.getAuthor(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_author",new String(page.getAuthor()), xml);
prop.putHTML("mode_author",new String(page.getAuthor()));
}
try {
prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"), xml);
prop.putHTML("mode_subject",new String(page.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_subject",new String(page.getSubject()), xml);
prop.putHTML("mode_subject",new String(page.getSubject()));
}
}
else prop.put("mode", "3"); //access denied (no rights)
@ -246,7 +246,7 @@ public class Blog {
if(pagename.equals(DEFAULT_PAGE)) {
// XXX: where are "peername" and "address" used in the template?
// XXX: "clientname" is already set to the peername, no need for a new setting
prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName(), xml);
prop.putHTML("peername", sb.webIndex.seedDB.mySeed().getName());
prop.put("address", address);
//index all entries
putBlogDefault(prop, sb, address, start, num, hasRights, xml);
@ -321,16 +321,16 @@ public class Blog {
{
// subject
try {
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"), xml);
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()), xml);
prop.putHTML("mode_entries_" + number + "_subject", new String(entry.getSubject()));
}
// author
try {
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"), xml);
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor(),"UTF-8"));
} catch (final UnsupportedEncodingException e) {
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()), xml);
prop.putHTML("mode_entries_" + number + "_author", new String(entry.getAuthor()));
}
// comments

View File

@ -212,7 +212,7 @@ public class CrawlProfileEditor_p {
prop.put("crawlProfiles_" + count + "_dark", dark ? "1" : "0");
prop.put("crawlProfiles_" + count + "_status", active ? "1" : "0");
prop.put("crawlProfiles_" + count + "_name", profile.name());
prop.putHTML("crawlProfiles_" + count + "_startURL", profile.startURL(), true);
prop.putXML("crawlProfiles_" + count + "_startURL", profile.startURL());
prop.put("crawlProfiles_" + count + "_handle", profile.handle());
prop.put("crawlProfiles_" + count + "_depth", profile.generalDepth());
prop.put("crawlProfiles_" + count + "_filter", profile.generalFilter());

View File

@ -81,7 +81,7 @@ public class MessageSend_p {
peerName = targetPeer.get(yacySeed.NAME,"nameless");
}
prop.putHTML("mode_permission_peerName", peerName, true);
prop.putXML("mode_permission_peerName", peerName);
final String response = (result == null) ? null : (String) result.get("response");
if (response == null || result == null) {
// we don't have permission or other peer does not exist
@ -98,11 +98,11 @@ public class MessageSend_p {
final int messagesize = Integer.parseInt(result.get("messagesize"));
final int attachmentsize = Integer.parseInt(result.get("attachmentsize"));
prop.putHTML("mode_permission_response", response, true);
prop.putXML("mode_permission_response", response);
prop.put("mode_permission_messagesize", messagesize);
prop.put("mode_permission_attachmentsize", attachmentsize);
prop.putHTML("mode_permission_subject", subject, true);
prop.putHTML("mode_permission_message", message, true);
prop.putXML("mode_permission_subject", subject);
prop.putXML("mode_permission_message", message);
prop.putHTML("mode_permission_hash", hash);
if (post.containsKey("preview")) {
prop.putWiki("mode_permission_previewmessage", message);
@ -140,7 +140,7 @@ public class MessageSend_p {
prop.put("mode_status", "1");
// "unresolved pattern", the remote peer is alive but had an exception
prop.putHTML("mode_status_message", message, true);
prop.putXML("mode_status_message", message);
}
}
return prop;

View File

@ -58,7 +58,7 @@ public class Messages_p {
final String peerAddress = sb.webIndex.seedDB.mySeed().getPublicAddress();
final String peerName = sb.webIndex.seedDB.mySeed().getName();
prop.put("peerAddress", peerAddress);
prop.putHTML("peerName", peerName, true);
prop.putXML("peerName", peerName);
// List known hosts for message sending (from Blacklist_p.java)
if (sb.webIndex.seedDB != null && sb.webIndex.seedDB.sizeConnected() > 0) {
@ -76,7 +76,7 @@ public class Messages_p {
while ((peername = hostList.firstKey()) != null) {
final String Hash = hostList.get(peername);
prop.put(PEERSKNOWN + "peers_" + peerCount + "_hash", Hash);
prop.putHTML(PEERSKNOWN + "peers_" + peerCount + "_name", peername, true);
prop.putXML(PEERSKNOWN + "peers_" + peerCount + "_name", peername);
hostList.remove(peername);
peerCount++;
}
@ -119,11 +119,11 @@ public class Messages_p {
message = sb.messageDB.read(key);
prop.put("mode_messages_"+count+"_dark", ((dark) ? "1" : "0") );
prop.put("mode_messages_"+count+"_date", dateString(message.date()));
prop.putHTML("mode_messages_"+count+"_from", message.author(), true);
prop.putHTML("mode_messages_"+count+"_to", message.recipient(), true);
prop.putHTML("mode_messages_"+count+"_subject", message.subject(), true);
prop.putHTML("mode_messages_"+count+"_category", message.category(), true);
prop.putHTML("mode_messages_"+count+"_key", key, true);
prop.putXML("mode_messages_"+count+"_from", message.author());
prop.putXML("mode_messages_"+count+"_to", message.recipient());
prop.putXML("mode_messages_"+count+"_subject", message.subject());
prop.putXML("mode_messages_"+count+"_category", message.category());
prop.putXML("mode_messages_"+count+"_key", key);
prop.put("mode_messages_"+count+"_hash", message.authorHash());
if ((header.get(httpRequestHeader.CONNECTION_PROP_PATH)).endsWith(".rss")) {
@ -135,7 +135,7 @@ public class Messages_p {
// also write out the message body (needed for the RSS feed)
try {
prop.putHTML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"), true);
prop.putXML("mode_messages_"+count+"_body",new String(message.message(), "UTF-8"));
} catch (final UnsupportedEncodingException e) {
// can not happen, because UTF-8 must be supported by every JVM
}
@ -157,10 +157,10 @@ public class Messages_p {
message = sb.messageDB.read(key);
if (message == null) throw new NullPointerException("Message with ID " + key + " does not exist");
prop.putHTML("mode_from", message.author(), true);
prop.putHTML("mode_to", message.recipient(), true);
prop.putXML("mode_from", message.author());
prop.putXML("mode_to", message.recipient());
prop.put("mode_date", dateString(message.date()));
prop.putHTML("mode_subject", message.subject(), true);
prop.putXML("mode_subject", message.subject());
String theMessage = null;
try {
theMessage = new String(message.message(), "UTF-8");
@ -169,7 +169,7 @@ public class Messages_p {
}
prop.putWiki("mode_message", theMessage);
prop.put("mode_hash", message.authorHash());
prop.putHTML("mode_key", key, true);
prop.putXML("mode_key", key);
}
// return rewrite properties

View File

@ -111,14 +111,14 @@ public class PerformanceQueues_p {
// set values to templates
prop.put("table_" + c + "_threadname", threadName);
prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription(), xml);
prop.putHTML("table_" + c + "_hasurl_shortdescr", thread.getShortDescription());
if(thread.getMonitorURL() == null) {
prop.put("table_"+c+"_hasurl", "0");
}else{
prop.put("table_"+c+"_hasurl", "1");
prop.put("table_" + c + "_hasurl_url", thread.getMonitorURL());
}
prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription(), xml);
prop.putHTML("table_" + c + "_longdescr", thread.getLongDescription());
queuesize = thread.getJobCount();
prop.put("table_" + c + "_queuesize", (queuesize == Integer.MAX_VALUE) ? "unlimited" : yFormatter.number(queuesize, !xml));

View File

@ -162,7 +162,7 @@ public final class Settings_p {
}
// clientIP
prop.putHTML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, "<unknown>"), true); // read an artificial header addendum
prop.putXML("clientIP", (String) header.get(httpRequestHeader.CONNECTION_PROP_CLIENTIP, "<unknown>")); // read an artificial header addendum
/*
* seed upload settings
@ -239,7 +239,7 @@ public final class Settings_p {
while (availableParserIter.hasNext()) {
final ParserInfo parserInfo = availableParserIter.next();
prop.put("parser_" + parserIdx + "_name", parserInfo.parserName);
prop.putHTML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr, true);
prop.putXML("parser_" + parserIdx + "_version", parserInfo.parserVersionNr);
prop.put("parser_" + parserIdx + "_usage", parserInfo.usageCount);
prop.put("parser_" + parserIdx + "_colspan", configArray.length);

View File

@ -170,8 +170,8 @@ public class Status {
if (sb.getConfig("remoteProxyUse", "false").equals("true")) {
prop.put("remoteProxy", "1");
prop.putHTML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"), true);
prop.putHTML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"), true);
prop.putXML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"));
prop.putXML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"));
prop.put("remoteProxy_4Yacy", sb.getConfig("remoteProxyUse4Yacy", "true").equalsIgnoreCase("true") ? "0" : "1");
} else {
prop.put("remoteProxy", "0"); // not used
@ -201,7 +201,7 @@ public class Status {
} else {
prop.put("peerAddress", "1"); // Address
prop.put("peerAddress_address", sb.webIndex.seedDB.mySeed().getPublicAddress());
prop.putHTML("peerAddress_peername", sb.getConfig("peerName", "<nameless>").toLowerCase(), true);
prop.putXML("peerAddress_peername", sb.getConfig("peerName", "<nameless>").toLowerCase());
}
}
final String peerStatus = ((sb.webIndex.seedDB.mySeed() == null) ? yacySeed.PEERTYPE_VIRGIN : sb.webIndex.seedDB.mySeed().get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN));

View File

@ -153,11 +153,11 @@ public class Surftips {
prop.put("surftips_results_" + i + "_authorized_recommend_display", display);
prop.put("surftips_results_" + i + "_authorized_recommend_showScore", (showScore ? "1" : "0"));
prop.putHTML("surftips_results_" + i + "_authorized_urlhash", urlhash, true);
prop.putHTML("surftips_results_" + i + "_url", url, true);
prop.putHTML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60), true);
prop.putHTML("surftips_results_" + i + "_urlhash", urlhash, true);
prop.putHTML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title, true);
prop.putXML("surftips_results_" + i + "_authorized_urlhash", urlhash);
prop.putXML("surftips_results_" + i + "_url", url);
prop.putXML("surftips_results_" + i + "_urlname", nxTools.shortenURLString(url, 60));
prop.putXML("surftips_results_" + i + "_urlhash", urlhash);
prop.putXML("surftips_results_" + i + "_title", (showScore) ? ("(" + ranking.getScore(urlhash) + ") " + title) : title);
prop.putHTML("surftips_results_" + i + "_description", description);
i++;

View File

@ -35,7 +35,7 @@ import java.util.Date;
import java.util.Map;
import java.util.Map.Entry;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
@ -122,9 +122,9 @@ public class Threaddump_p {
line = null;
}
if ((line != null) && (line.length() > 0)) {
bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true) + " [" + line.trim() + "]");
bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true) + " [" + line.trim() + "]");
} else {
bufferappend(buffer, plain, tracename + "at " + htmlTools.encodeUnicode2html(ste.toString(), true));
bufferappend(buffer, plain, tracename + "at " + htmlFilterCharacterCoding.unicode2html(ste.toString(), true));
}
}
bufferappend(buffer, plain, "");

View File

@ -33,8 +33,8 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
@ -352,7 +352,7 @@ public class ViewFile {
}
private static final String markup(final String[] wordArray, String message) {
message = htmlTools.encodeUnicode2html(message, true);
message = htmlFilterCharacterCoding.unicode2html(message, true);
if (wordArray != null)
for (int j = 0; j < wordArray.length; j++) {
final String currentWord = wordArray[j].trim();

View File

@ -43,9 +43,9 @@ public class opensearchdescription {
if (thisaddress.indexOf(":") == -1) thisaddress += ":" + serverCore.getPortNr(env.getConfig("port", "8080"));
final serverObjects prop = new serverObjects();
prop.putHTML("thisaddress", thisaddress, true);
prop.putHTML("SearchPageGreeting", promoteSearchPageGreeting, true);
prop.putHTML("clientname", sb.webIndex.seedDB.mySeed().getName(), true);
prop.putXML("thisaddress", thisaddress);
prop.putXML("SearchPageGreeting", promoteSearchPageGreeting);
prop.putXML("clientname", sb.webIndex.seedDB.mySeed().getName());
// return rewrite properties
return prop;

View File

@ -59,13 +59,13 @@ public class all {
Date date;
while(it.hasNext()){
bookmark=switchboard.bookmarksDB.getBookmark(it.next());
prop.putHTML("posts_"+count+"_url", bookmark.getUrl(), true);
prop.putHTML("posts_"+count+"_title", bookmark.getTitle(), true);
prop.putHTML("posts_"+count+"_description", bookmark.getDescription(), true);
prop.putHTML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()), true);
prop.putXML("posts_"+count+"_url", bookmark.getUrl());
prop.putXML("posts_"+count+"_title", bookmark.getTitle());
prop.putXML("posts_"+count+"_description", bookmark.getDescription());
prop.putXML("posts_"+count+"_md5", serverCodings.encodeMD5Hex(bookmark.getUrl()));
date=new Date(bookmark.getTimeStamp());
prop.putHTML("posts_"+count+"_time", serverDate.formatISO8601(date), true);
prop.putHTML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "), true);
prop.putXML("posts_"+count+"_time", serverDate.formatISO8601(date));
prop.putXML("posts_"+count+"_tags", bookmark.getTagsString().replaceAll(","," "));
// additional XML tags
prop.put("posts_"+count+"_isExtended",extendedXML ? "1" : "0");

View File

@ -88,7 +88,7 @@ public class get {
while (it.hasNext()) {
tag = it.next();
if(!tag.getTagName().startsWith("/")) { // ignore folder tags
prop.putHTML("tags_"+count+"_name", tag.getTagName(), true);
prop.putXML("tags_"+count+"_name", tag.getTagName());
prop.put("tags_"+count+"_count", tag.size());
count++;
}

View File

@ -4,7 +4,7 @@ import java.util.Date;
import java.util.Iterator;
import de.anomic.data.bookmarksDB;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate;
@ -83,7 +83,7 @@ public class xbel {
count++;
final String title = fn; // just to make sure fn stays untouched
prop.put("xbel_"+count+"_elements", "<title>" + htmlTools.encodeUnicode2xml(title.replaceAll("(/.[^/]*)*/", "")) + "</title>");
prop.put("xbel_"+count+"_elements", "<title>" + htmlFilterCharacterCoding.unicode2xml(title.replaceAll("(/.[^/]*)*/", ""), true) + "</title>");
count++;
final Iterator<String> bit=switchboard.bookmarksDB.getBookmarksIterator(fn, isAdmin);
count = print_XBEL(bit, count);
@ -106,19 +106,19 @@ public class xbel {
bookmark=switchboard.bookmarksDB.getBookmark(bit.next());
date=new Date(bookmark.getTimeStamp());
prop.put("xbel_"+count+"_elements", "<bookmark id=\"" + bookmark.getUrlHash()
+ "\" href=\"" + htmlTools.encodeUnicode2xml(bookmark.getUrl())
+ "\" added=\"" + htmlTools.encodeUnicode2xml(serverDate.formatISO8601(date))+"\">");
+ "\" href=\"" + htmlFilterCharacterCoding.unicode2xml(bookmark.getUrl(), true)
+ "\" added=\"" + htmlFilterCharacterCoding.unicode2xml(serverDate.formatISO8601(date), true)+"\">");
count++;
prop.put("xbel_"+count+"_elements", "<title>");
count++;
prop.putHTML("xbel_"+count+"_elements", bookmark.getTitle(), true);
prop.putXML("xbel_"+count+"_elements", bookmark.getTitle());
count++;
prop.put("xbel_"+count+"_elements", "</title>");
count++;
prop.put("xbel_"+count+"_elements", "<info>");
count++;
prop.put("xbel_"+count+"_elements", "<metadata owner=\"Mozilla\" ShortcutURL=\""
+ htmlTools.encodeUnicode2xml(bookmark.getTagsString().replaceAll("/.*,", "").toLowerCase())
+ htmlFilterCharacterCoding.unicode2xml(bookmark.getTagsString().replaceAll("/.*,", "").toLowerCase(), true)
+ "\"/>");
count++;
prop.put("xbel_"+count+"_elements", "<metadata owner=\"YaCy\" public=\""+Boolean.toString(bookmark.getPublic())+"\"/>");
@ -127,7 +127,7 @@ public class xbel {
count++;
prop.put("xbel_"+count+"_elements", "<desc>");
count++;
prop.putHTML("xbel_"+count+"_elements", bookmark.getDescription(), true);
prop.putXML("xbel_"+count+"_elements", bookmark.getDescription());
count++;
prop.put("xbel_"+count+"_elements", "</desc>");
count++;

View File

@ -66,8 +66,8 @@ public class feed {
RSSMessage message = feed.getChannel();
if (message != null) {
prop.putHTML("channel_title", message.getTitle(), true);
prop.putHTML("channel_description", message.getDescription(), true);
prop.putXML("channel_title", message.getTitle());
prop.putXML("channel_description", message.getDescription());
prop.put("channel_pubDate", message.getPubDate());
}
while ((messageMaxCount > 0) && (feed.size() > 0)) {
@ -75,9 +75,9 @@ public class feed {
if (message == null) continue;
// create RSS entry
prop.putHTML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle(), true);
prop.putHTML("item_" + messageCount + "_description", message.getDescription(), true);
prop.putHTML("item_" + messageCount + "_link", message.getLink(), true);
prop.putXML("item_" + messageCount + "_title", channels[channelIndex] + ": " + message.getTitle());
prop.putXML("item_" + messageCount + "_description", message.getDescription());
prop.putXML("item_" + messageCount + "_link", message.getLink());
prop.put("item_" + messageCount + "_pubDate", message.getPubDate());
prop.put("item_" + messageCount + "_guid", message.getGuid());
messageCount++;

View File

@ -110,8 +110,8 @@ public class queues_p {
prop.putHTML("list-indexing_"+i+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-indexing_"+i+"_depth", pcentry.depth());
prop.put("list-indexing_"+i+"_modified", pcentry.getModificationDate());
prop.putHTML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName(), true);
prop.putHTML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true), true);
prop.putXML("list-indexing_"+i+"_anchor", (pcentry.anchorName()==null) ? "" : pcentry.anchorName());
prop.putXML("list-indexing_"+i+"_url", pcentry.url().toNormalform(false, true));
prop.putNum("list-indexing_"+i+"_size", entrySize);
prop.put("list-indexing_"+i+"_inProcess", (inProcess) ? "1" : "0");
prop.put("list-indexing_"+i+"_hash", pcentry.urlHash());
@ -135,7 +135,7 @@ public class queues_p {
initiator = sb.webIndex.seedDB.getConnected(w[i].initiator());
prop.putHTML("list-loader_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("list-loader_"+count+"_depth", w[i].depth());
prop.putHTML("list-loader_"+count+"_url", w[i].url().toString(), true);
prop.putXML("list-loader_"+count+"_url", w[i].url().toString());
count++;
}
prop.put("list-loader", count);
@ -181,8 +181,8 @@ public class queues_p {
prop.put(tableName + "_" + showNum + "_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put(tableName + "_" + showNum + "_depth", urle.depth());
prop.put(tableName + "_" + showNum + "_modified", daydate(urle.loaddate()));
prop.putHTML(tableName + "_" + showNum + "_anchor", urle.name(), true);
prop.putHTML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true), true);
prop.putXML(tableName + "_" + showNum + "_anchor", urle.name());
prop.putXML(tableName + "_" + showNum + "_url", urle.url().toNormalform(false, true));
prop.put(tableName + "_" + showNum + "_hash", urle.url().hash());
showNum++;
}

View File

@ -65,7 +65,7 @@ public class getpageinfo_p {
String url=post.get("url");
if(url.toLowerCase().startsWith("ftp://")){
prop.put("robots-allowed", "1");
prop.putHTML("title", "FTP: "+url, true);
prop.putXML("title", "FTP: "+url);
return prop;
} else if (!(url.toLowerCase().startsWith("http://") || url.toLowerCase().startsWith("https://"))) {
url = "http://" + url;
@ -86,7 +86,7 @@ public class getpageinfo_p {
writer.close();
// put the document title
prop.putHTML("title", scraper.getTitle(), true);
prop.putXML("title", scraper.getTitle());
// put the favicon that belongs to the document
prop.put("favicon", (scraper.getFavicon()==null) ? "" : scraper.getFavicon().toString());
@ -97,16 +97,16 @@ public class getpageinfo_p {
for(int i=0;i<list.length;i++){
String tag = list[i];
if (!tag.equals("")) {
prop.putHTML("tags_"+count+"_tag", tag, true);
prop.putXML("tags_"+count+"_tag", tag);
count++;
}
}
prop.put("tags", count);
// put description
prop.putHTML("desc", scraper.getDescription(), true);
prop.putXML("desc", scraper.getDescription());
// put language
Set<String> languages = scraper.getContentLanguages();
prop.putHTML("lang", (languages == null) ? "unknown" : languages.iterator().next(), true);
prop.putXML("lang", (languages == null) ? "unknown" : languages.iterator().next());
} catch (final MalformedURLException e) { /* ignore this */
} catch (final IOException e) { /* ignore this */
@ -121,7 +121,7 @@ public class getpageinfo_p {
// get the sitemap URL of the domain
final yacyURL sitemapURL = sb.robots.getSitemapURL(theURL);
prop.putHTML("sitemap", (sitemapURL==null)?"":sitemapURL.toString(), true);
prop.putXML("sitemap", (sitemapURL==null)?"":sitemapURL.toString());
} catch (final MalformedURLException e) {}
}

View File

@ -339,7 +339,7 @@ public class ysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements
prop.putHTML("rss_query", querystring, true);
prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis();

View File

@ -96,9 +96,9 @@ public class ysearchitem {
if (rss) {
// text search for rss output
prop.put("rss", "1"); // switch on specific content
prop.putHTML("rss_title", result.title(), true);
prop.putHTML("rss_description", result.textSnippet().getLineRaw(), true);
prop.putHTML("rss_link", result.urlstring(), true);
prop.putXML("rss_title", result.title());
prop.putXML("rss_description", result.textSnippet().getLineRaw());
prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop;

View File

@ -439,7 +439,7 @@ public class yacysearch {
prop.put("input_contentdomCheckApp", (contentdomCode == plasmaSearchQuery.CONTENTDOM_APP) ? "1" : "0");
// for RSS: don't HTML encode some elements
prop.putHTML("rss_query", querystring, true);
prop.putXML("rss_query", querystring);
prop.put("rss_queryenc", yacyURL.escape(querystring.replace(' ', '+')));
sb.localSearchLastAccess = System.currentTimeMillis();

View File

@ -182,10 +182,10 @@ public class yacysearchitem {
if (rss) {
// text search for rss output
prop.put("rss", "1"); // switch on specific content
prop.putHTML("rss_title", result.title(), true);
prop.putXML("rss_title", result.title());
final plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
prop.putHTML("rss_description", (snippet == null) ? "" : snippet.getLineRaw(), true);
prop.putHTML("rss_link", result.urlstring(), true);
prop.putXML("rss_description", (snippet == null) ? "" : snippet.getLineRaw());
prop.putXML("rss_link", result.urlstring());
prop.put("rss_urlhash", result.hash());
prop.put("rss_date", plasmaSwitchboard.dateString822(result.modified()));
return prop;

View File

@ -30,6 +30,8 @@ package de.anomic.data;
import java.util.ArrayList;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
/**
* This class provides a diff-functionality.
*/
@ -253,7 +255,7 @@ public class diff {
case diff.Part.ADDED: sb.append("added"); break;
case diff.Part.DELETED: sb.append("deleted"); break;
}
sb.append("\">").append(htmlTools.encodeUnicode2html(ps[j].getString(), true).replaceAll("\n", "<br />"));
sb.append("\">").append(htmlFilterCharacterCoding.unicode2html(ps[j].getString(), true).replaceAll("\n", "<br />"));
sb.append("</span>");
}
sb.append("</p>");

View File

@ -35,6 +35,7 @@ import java.util.HashMap;
import de.anomic.data.wiki.abstractWikiParser;
import de.anomic.data.wiki.wikiParser;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
@ -757,7 +758,7 @@ public class wikiCode extends abstractWikiParser implements wikiParser {
public String transformLine(String result, final String publicAddress, final plasmaSwitchboard switchboard) {
//If HTML has not bee replaced yet (can happen if method gets called in recursion), replace now!
if (!replacedHTML || preformattedSpan){
result = htmlTools.encodeUnicode2html(result, true);
result = htmlFilterCharacterCoding.unicode2html(result, true);
replacedHTML = true;
}

View File

@ -1,13 +1,8 @@
// htmlTools.java
// -----------------------
// (C) by Michael Peter Christen; mc@yacy.net,
// (C) by Jan Sandbrink (NN), Franz Brausse (FB, karlchenofhell),
// (C) by Bjoern 'fuchs' Krombholz (fuchsi)
// first published on http://www.yacy.net
//
// $LastChangedDate: $
// $LastChangedRevision: $
// $LastChangedBy: $
// htmlFilterCharacterCoding.java
// ----------------------------------
// (C) 22.10.2008 by Michael Peter Christen; mc@yacy.net
// first published on http://yacy.net
// Frankfurt, Germany, 2008
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -23,114 +18,22 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data;
package de.anomic.htmlFilter;
public class htmlTools {
import java.util.HashMap;
/** Replaces characters in a string with other entities according to HTML standards.
* @param text a string that possibly contains special characters
* @param includingAmpersand if <code>false</code> ampersands are not encoded
* @param forXML if <code>true</code> then only &amp;, &quot;, &lt; and &gt; will
* be transcoded.
* @return the string with all characters replaced by the corresponding character from array
*/
public static String encodeUnicode2html(final String text, final boolean includingAmpersand, final boolean forXML) {
if (text == null)
return null;
final int spos = (includingAmpersand ? 0 : 2);
// if (forXML), then only encode ampersand, quotation mark, less than and
// greather than which are the first 4 pairs in default mapping table
final int epos = (forXML ? 8 : mapping.length);
public class htmlFilterCharacterCoding {
return encode(text, mapping, spos, epos);
}
private static final char amp_unicode = "\u0026".charAt(0);
private static final String amp_html = "&amp;";
/**
* Like {@link #encodeUnicode2html(String, boolean, boolean)} with <code>forXML = false</code>
*/
public static String encodeUnicode2html(final String text, final boolean includingAmpersand) {
return encodeUnicode2html(text, includingAmpersand, false);
}
/**
* Replaces special entities ampersand, quotation marks, and less than/graiter than
* by the escaping entities allowed in XML documents.
*
* Like {@link #encodeUnicode2html(String, boolean, boolean)} with
* <code>includingAmpersand = true</code> and <code>foxXML = true</code>.
*
* @param text the original String
* @return the encoded String
*/
public static String encodeUnicode2xml(final String text) {
return encodeUnicode2html(text, true, true);
}
/**
* Generic method that replaces occurences of special character entities defined in map
* array with their corresponding mapping.
* @param text The String too process.
* @param map An array defining the entity mapping.
* @param spos It is possible to use a subset of the map only. This parameter defines the
* starting point in the map array.
* @param epos The ending point, see above.
* @return A copy of the original String with all entities defined in map replaced.
*/
public static String encode(final String text, final String[] map, final int spos, final int epos) {
final StringBuffer sb = new StringBuffer(text.length());
int textpos = 0;
search: while (textpos < text.length()) {
// find a (forward) mapping
loop: for (int i = spos; i < epos; i += 2) {
if (text.charAt(textpos) != map[i].charAt(0)) continue loop;
// found match
sb.append(map[i + 1]);
textpos++;
continue search;
}
// not found match
sb.append(text.charAt(textpos));
textpos++;
}
return sb.toString();
}
private static final String[] mapping4xml = {
"\"","&quot;", //quotation mark
"\u003C","&lt;", //less than
"\u003E","&gt;", //greater than
};
public static String decodeHtml2Unicode(final String text) {
if (text == null) return null;
int pos = 0;
final StringBuffer sb = new StringBuffer(text.length());
search: while (pos < text.length()) {
// find a reverse mapping. TODO: replace matching with hashtable(s)
loop: for (int i = 0; i < mapping.length; i += 2) {
if (pos + mapping[i + 1].length() > text.length()) continue loop;
for (int j = mapping[i + 1].length() - 1; j >= 0; j--) {
if (text.charAt(pos + j) != mapping[i + 1].charAt(j)) continue loop;
}
// found match
sb.append(mapping[i]);
pos = pos + mapping[i + 1].length();
continue search;
}
// not found match
sb.append(text.charAt(pos));
pos++;
}
return new String(sb);
}
//This array contains codes (see http://mindprod.com/jgloss/unicode.html for details)
//that will be replaced. To add new codes or patterns, just put them at the end
//of the list. Codes or patterns in this list can not be escaped with [= or <pre>
private static final String[] mapping = {
// Ampersands _have_ to be replaced first. If they were replaced later,
// other replaced characters containing ampersands would get messed up.
"\u0026","&amp;", //ampersand
"\"","&quot;", //quotation mark
"\u003C","&lt;", //less than
"\u003E","&gt;", //greater than
private static final String[] mapping4html = {
"\\", "&#092;", // Backslash
"\u005E","&#094;", // Caret
@ -267,15 +170,109 @@ public class htmlTools {
"\u00FF","&yuml;"
};
private final static HashMap<String, Character> html2unicode4xml = new HashMap<String, Character>();
private final static HashMap<String, Character> html2unicode4html = new HashMap<String, Character>();
private final static HashMap<Character, String> unicode2html4xml = new HashMap<Character, String>();
private final static HashMap<Character, String> unicode2html4html = new HashMap<Character, String>();
static {
Character c;
for (int i = 0; i < mapping4html.length; i += 2) {
c = new Character(mapping4html[i].charAt(0));
html2unicode4html.put(mapping4html[i + 1], c);
unicode2html4html.put(c, mapping4html[i + 1]);
}
for (int i = 0; i < mapping4xml.length; i += 2) {
c = new Character(mapping4xml[i].charAt(0));
html2unicode4xml.put(mapping4xml[i + 1], c);
unicode2html4xml.put(c, mapping4xml[i + 1]);
}
}
public static String unicode2xml(final String text, boolean amp) {
return unicode2html(text, amp, false);
}
public static String unicode2html(final String text, boolean amp) {
return unicode2html(text, amp, true);
}
private static String unicode2html(final String text, boolean amp, boolean html) {
if (text == null) return null;
final StringBuffer sb = new StringBuffer(text.length() * 12 / 10);
int textpos = 0;
String r;
char c;
while (textpos < text.length()) {
// find a (forward) mapping
c = text.charAt(textpos);
if (amp && c == amp_unicode) {
sb.append(amp_html);
textpos++;
continue;
}
if ((r = unicode2html4xml.get(c)) != null) {
sb.append(r);
textpos++;
continue;
}
if (html && (r = unicode2html4html.get(c)) != null) {
sb.append(r);
textpos++;
continue;
}
sb.append(c);
textpos++;
}
return sb.toString();
}
public static String html2unicode(final String text) {
if (text == null) return null;
int p = 0, p1, q;
final StringBuffer sb = new StringBuffer(text.length());
String s;
Character r;
while (p < text.length()) {
p1 = text.indexOf('&', p);
if (p1 < 0) p1 = text.length();
sb.append(text.subSequence(p, p1));
p = p1;
if (p >= text.length()) break;
q = text.indexOf(';', p);
if (q < 0) {
p++;
continue;
}
s = text.substring(p, q + 1);
if (s.equals(amp_html)) {
sb.append(amp_unicode);
p = q + 1;
continue;
}
if ((r = html2unicode4xml.get(s)) != null) {
sb.append(r.charValue());
p = q + 1;
continue;
}
if ((r = html2unicode4html.get(s)) != null) {
sb.append(r);
p = q + 1;
continue;
}
// the entity is unknown, skip it
}
return new String(sb);
}
public static void main(final String[] args) {
final String text = "Test-Text mit & um zyklische &uuml; &amp; Ersetzungen auszuschliessen";
final String txet = encodeUnicode2html(text, true);
final String txet = unicode2html(text, true);
System.out.println(txet);
System.out.println(decodeHtml2Unicode(txet));
if (decodeHtml2Unicode(txet).equals(text)) System.out.println("correct");
System.out.println(html2unicode(txet));
if (html2unicode(txet).equals(text)) System.out.println("correct");
final String text2 = "encodeUnicode2xml: & \" < >";
System.out.println(text2);
System.out.println(encodeUnicode2xml(text2));
System.out.println(unicode2xml(text2, true));
}
}

View File

@ -44,7 +44,6 @@ import java.util.Properties;
import javax.swing.event.EventListenerList;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.server.serverCharBuffer;
@ -166,11 +165,11 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
if (tagname.equalsIgnoreCase("meta")) {
String name = tagopts.getProperty("name", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
} else {
name = tagopts.getProperty("http-equiv", "");
if (name.length() > 0) {
metas.put(name.toLowerCase(), htmlTools.decodeHtml2Unicode(tagopts.getProperty("content","")));
metas.put(name.toLowerCase(), htmlFilterCharacterCoding.html2unicode(tagopts.getProperty("content","")));
}
}
}

View File

@ -59,8 +59,8 @@ import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.httpclient.ChunkedInputStream;
import org.apache.commons.httpclient.ContentLengthInputStream;
import de.anomic.data.htmlTools;
import de.anomic.data.userDB;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
@ -850,7 +850,7 @@ public final class httpd implements serverHandler, Cloneable {
// 06.01.2007: decode HTML entities by [FB]
public static String decodeHtmlEntities(String s) {
// replace all entities defined in wikiCode.characters and htmlentities
s = htmlTools.decodeHtml2Unicode(s);
s = htmlFilterCharacterCoding.html2unicode(s);
// replace all other
final CharArrayWriter b = new CharArrayWriter(s.length());

View File

@ -39,7 +39,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpRemoteProxyConfig;
@ -452,14 +452,14 @@ public final class indexRepositoryReference {
pw.println(url);
}
if (format == 1) {
pw.println("<a href=\"" + url + "\">" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</a><br>");
pw.println("<a href=\"" + url + "\">" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + "</a><br>");
}
if (format == 2) {
pw.println("<item>");
pw.println("<title>" + htmlTools.encodeUnicode2html(comp.dc_title(), true, true) + "</title>");
pw.println("<title>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_title(), true) + "</title>");
pw.println("<link>" + yacyURL.escape(url) + "</link>");
if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlTools.encodeUnicode2html(comp.dc_creator(), true, true) + "</author>");
if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlTools.encodeUnicode2html(comp.dc_subject(), true, true) + "</description>");
if (comp.dc_creator().length() > 0) pw.println("<author>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_creator(), true) + "</author>");
if (comp.dc_subject().length() > 0) pw.println("<description>" + htmlFilterCharacterCoding.unicode2xml(comp.dc_subject(), true) + "</description>");
pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
pw.println("<guid isPermaLink=\"false\">" + entry.hash() + "</guid>");
pw.println("</item>");

View File

@ -26,8 +26,8 @@ import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterAbstractScraper;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.index.indexWord;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield;
@ -266,7 +266,7 @@ public final class plasmaSearchQuery {
public String queryString(final boolean encodeHTML) {
if(encodeHTML){
return htmlTools.encodeUnicode2html(this.queryString, true);
return htmlFilterCharacterCoding.unicode2html(this.queryString, true);
}
return this.queryString;
}

View File

@ -52,7 +52,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import de.anomic.data.htmlTools;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.tools.yFormatter;
@ -146,10 +146,10 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
* @param key key name as String.
* @param value a String that will be reencoded for HTML output.
* @return the modified String that was added to the map.
* @see htmlTools#encodeUnicode2html(String, boolean)
* @see htmlFilterCharacterCoding#encodeUnicode2html(String, boolean)
*/
public String putHTML(final String key, final String value) {
return putHTML(key, value, false);
return put(key, htmlFilterCharacterCoding.unicode2html(value, true));
}
/**
@ -158,8 +158,8 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
* If forXML is <code>true</code>, then only the characters <b>&amp; &quot; &lt; &gt;</b> will be
* replaced in the returned String.
*/
public String putHTML(final String key, final String value, final boolean forXML) {
return put(key, htmlTools.encodeUnicode2html(value, true, forXML));
public String putXML(final String key, final String value) {
return put(key, htmlFilterCharacterCoding.unicode2xml(value, true));
}
/**