removed the option to prevent removal of & parts inside of the

MultiProtocolURI during normalform computation because that should
always be done and also be done during initialization of the
MultiProtocolURI Object. The new normalform method takes only one
argument which should be 'true' unless you know exactly what you are
doing.
This commit is contained in:
Michael Peter Christen 2012-10-10 11:46:22 +02:00
parent 53789555b9
commit 5f0ab25382
80 changed files with 229 additions and 234 deletions

View File

@ -199,7 +199,7 @@ public class Bookmarks {
if (urlentry != null) try {
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay));
prop.put("mode_edit", "0"); // create mode
prop.put("mode_url", urlentry.url().toNormalform(false, true));
prop.put("mode_url", urlentry.url().toNormalform(false));
prop.putHTML("mode_title", urlentry.dc_title());
prop.putHTML("mode_description", (document == null) ? urlentry.dc_title(): document.dc_title());
prop.putHTML("mode_author", urlentry.dc_creator());

View File

@ -65,7 +65,7 @@ public class Collage {
prop.put("emb", (embed) ? "0" : "1");
if (nextOrigin != null) {
System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true, false));
System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true));
if (fifoSize == 0 || origins[fifoPos] != nextOrigin) {
fifoPos = fifoPos + 1 >= fifoMax ? 0 : fifoPos + 1;
fifoSize = fifoSize + 1 > fifoMax ? fifoMax : fifoSize + 1;
@ -99,17 +99,17 @@ public class Collage {
final long z = imgZIndex[i];
prop.put("imgurl_list_" + c + "_url",
"<a href=\"" + baseURL.toNormalform(true, false) + "\">"
+ "<img src=\"" + imageURL.toNormalform(true, false) + "\" "
"<a href=\"" + baseURL.toNormalform(true) + "\">"
+ "<img src=\"" + imageURL.toNormalform(true) + "\" "
+ "style=\""
+ ((imgWidth[i] == 0 || imgHeight[i] == 0) ? "" : "width:" + imgWidth[i] + "px;height:" + imgHeight[i] + "px;")
+ "position:absolute;top:" + (imgPosY[i] + yOffset)
+ "px;left:" + imgPosX[i]
+ "px;z-index:" + z + "\" "
+ "id=\"col" + z + "\" "
+ "alt=\"" + baseURL.toNormalform(true, false) + "\" "
+ "alt=\"" + baseURL.toNormalform(true) + "\" "
+ "onmouseover=\"raise(" + z + ")\" onmouseout=\"lower(" + z + ")\" "
+ "title=\"" + baseURL.toNormalform(true, false) + "\" />"
+ "title=\"" + baseURL.toNormalform(true) + "\" />"
+ "</a><br />");
c++;
}

View File

@ -78,8 +78,8 @@ public class CrawlCheck_p {
StringBuilder s = new StringBuilder(300);
int row = 0;
for (DigestURI u: rootURLs) {
s.append(u.toNormalform(true, true)).append('\n');
prop.put("table_list_" + row + "_url", u.toNormalform(true, true));
s.append(u.toNormalform(true)).append('\n');
prop.put("table_list_" + row + "_url", u.toNormalform(true));
// try to load the robots
RobotsTxtEntry robotsEntry;
@ -94,7 +94,7 @@ public class CrawlCheck_p {
robotsAllowed = !robotsEntry.isDisallowed(u);
prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms");
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true, true));
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true));
}
} catch (final IOException e) {
}

View File

@ -197,7 +197,7 @@ public class CrawlResults {
urltxt = null;
continue;
}
urlstr = urle.url().toNormalform(false, true);
urlstr = urle.url().toNormalform(true);
urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
initiatorSeed = entry.getValue() == null || entry.getValue().initiatorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().initiatorHash));

View File

@ -195,7 +195,7 @@ public class CrawlStartScanner_p
if ( url != null ) {
String path =
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
path += "&crawlingURL=" + url.toNormalform(true, false);
path += "&crawlingURL=" + url.toNormalform(true);
WorkTables.execAPICall(
Domains.LOCALHOST,
(int) sb.getConfigLong("port", 8090),
@ -237,7 +237,7 @@ public class CrawlStartScanner_p
host = se.next();
try {
u = new DigestURI(host.getKey().url());
urlString = u.toNormalform(true, false);
urlString = u.toNormalform(true);
if ( host.getValue() == Access.granted
&& Scanner.inIndex(apiCommentCache, urlString) == null ) {
String path =

View File

@ -238,10 +238,10 @@ public class Crawler_p {
// store this call as api call
if (repeat_time > 0) {
// store as scheduled api call
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true, false)), repeat_time, repeat_unit.substring(3));
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true)), repeat_time, repeat_unit.substring(3));
} else {
// store just a protocol
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true, false)));
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true)));
}
final boolean crawlingDomMaxCheck = "on".equals(post.get("crawlingDomMaxCheck", "off"));
@ -327,7 +327,7 @@ public class Crawler_p {
try {
Pattern mmp = Pattern.compile(newcrawlingMustMatch);
for (DigestURI u: rootURLs) {
assert mmp.matcher(u.toNormalform(true, true)).matches() : "pattern " + mmp.toString() + " does not match url " + u.toNormalform(true, true);
assert mmp.matcher(u.toNormalform(true)).matches() : "pattern " + mmp.toString() + " does not match url " + u.toNormalform(true);
}
} catch (final PatternSyntaxException e) {
prop.put("info", "4"); // crawlfilter does not match url
@ -570,7 +570,7 @@ public class Crawler_p {
return "scraper cannot load URL: " + e.getMessage();
}
final String title = scraper == null ? url.toNormalform(true, true) : scraper.dc_title();
final String title = scraper == null ? url.toNormalform(true) : scraper.dc_title();
final String description = scraper.dc_description();
// add the url to the crawl stack
@ -606,7 +606,7 @@ public class Crawler_p {
if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2);
// we will create always a bookmark to use this to track crawled hosts
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url.toNormalform(true, false), "admin");
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url.toNormalform(true), "admin");
if (bookmark != null) {
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_TITLE, title);
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description);

View File

@ -487,7 +487,7 @@ public class IndexControlRWIs_p {
if ( url == null ) {
continue;
}
us = url.toNormalform(false, false);
us = url.toNormalform(true);
if ( rn == -1 ) {
rn = entry.ranking();
}

View File

@ -179,7 +179,7 @@ public class IndexControlURLs_p {
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
} else {
urlstring = entry.url().toNormalform(false, true);
urlstring = entry.url().toNormalform(true);
prop.put("urlstring", "");
sb.urlRemove(segment, urlhash.getBytes());
prop.putHTML("result", "Removed URL " + urlstring);
@ -207,7 +207,7 @@ public class IndexControlURLs_p {
prop.put("urlhash", urlhash);
final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
if (entry == null) {
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true));
prop.putHTML("urlstring", urlstring);
prop.put("urlhash", "");
} else {
@ -225,7 +225,7 @@ public class IndexControlURLs_p {
if (entry == null) {
prop.putHTML("result", "No Entry for URL hash " + urlhash);
} else {
prop.putHTML("urlstring", entry.url().toNormalform(false, true));
prop.putHTML("urlstring", entry.url().toNormalform(true));
prop.putAll(genUrlProfile(segment, entry, urlhash));
prop.put("statistics", 0);
}
@ -354,13 +354,13 @@ public class IndexControlURLs_p {
return prop;
}
prop.put("genUrlProfile", "2");
prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(false, true));
prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(true));
prop.put("genUrlProfile_urlhash", urlhash);
prop.put("genUrlProfile_urlDescr", entry.dc_title());
prop.put("genUrlProfile_moddate", entry.moddate().toString());
prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.url().toNormalform(false, true));
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.url().toNormalform(true));
prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype()));
prop.put("genUrlProfile_language", entry.language());

View File

@ -59,7 +59,7 @@ public class IndexCreateLoaderQueue_p {
prop.putHTML("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
prop.put("loader-set_list_"+count+"_depth", element.depth());
prop.put("loader-set_list_"+count+"_status", element.getStatus());
prop.putHTML("loader-set_list_"+count+"_url", element.url().toNormalform(true, false));
prop.putHTML("loader-set_list_"+count+"_url", element.url().toNormalform(true));
dark = !dark;
count++;
}

View File

@ -89,14 +89,14 @@ public class IndexCreateParserErrors_p {
executorSeed = (executorHash == null) ? null : sb.peers.getConnected(ASCII.String(executorHash));
prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true));
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false));
String cause = entry.anycause();
if (cause.startsWith(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER)) {
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false, true) +
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
"&regex=" + cause.substring(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER.length()) + "\">test</a>) " + cause);
} else if (cause.startsWith(CrawlStacker.ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER)) {
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false, true) +
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
"&regex=" + cause.substring(CrawlStacker.ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER.length()) + "\">test</a>) " + cause);
} else {
prop.putHTML("rejected_list_"+j+"_failreason", cause);

View File

@ -147,7 +147,7 @@ public class IndexCreateQueues_p {
prop.put("crawler_host_" + hc + "_list_" + count + "_depth", request.depth());
prop.put("crawler_host_" + hc + "_list_" + count + "_modified", daydate(request.appdate()) );
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_anchor", request.name());
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_url", request.url().toNormalform(false, true));
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_url", request.url().toNormalform(true));
prop.put("crawler_host_" + hc + "_list_" + count + "_hash", request.url().hash());
count++;
}

View File

@ -71,7 +71,7 @@ public class IndexImportOAIPMH_p {
// set next default url
try {
final DigestURI nexturl = (rt == null) ? null : rt.resumptionURL();
if (rt != null) prop.put("defaulturl", (nexturl == null) ? "" : nexturl.toNormalform(true, false));
if (rt != null) prop.put("defaulturl", (nexturl == null) ? "" : nexturl.toNormalform(true));
} catch (final MalformedURLException e) {
prop.put("defaulturl", e.getMessage());
} catch (final IOException e) {

View File

@ -203,7 +203,7 @@ public class Load_RSS_p {
prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true, false));
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
prop.put("showscheduledfeeds_list_" + apic + "_lastload", DateFormat.getDateTimeInstance().format(row.get("last_load_date", new Date())));
prop.put("showscheduledfeeds_list_" + apic + "_nextload", date_next_exec == null ? "" : DateFormat.getDateTimeInstance().format(date_next_exec));
@ -217,7 +217,7 @@ public class Load_RSS_p {
prop.put("shownewfeeds_list_" + newc + "_count", newc);
prop.putXML("shownewfeeds_list_" + newc + "_rss", messageurl);
prop.putXML("shownewfeeds_list_" + newc + "_title", row.get("title", ""));
prop.putXML("shownewfeeds_list_" + newc + "_referrer", referrer == null ? "" : referrer.toNormalform(true, false));
prop.putXML("shownewfeeds_list_" + newc + "_referrer", referrer == null ? "" : referrer.toNormalform(true));
prop.put("shownewfeeds_list_" + newc + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
newc++;
}
@ -256,7 +256,7 @@ public class Load_RSS_p {
// if we have an url then try to load the rss
RSSReader rss = null;
if (url != null) try {
prop.put("url", url.toNormalform(true, false));
prop.put("url", url.toNormalform(true));
final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay);
final byte[] resource = response == null ? null : response.getContent();
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
@ -322,7 +322,7 @@ public class Load_RSS_p {
prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
prop.putHTML("showitems_item_" + i + "_title", item.getTitle());
prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(false, false));
prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(true));
prop.putHTML("showitems_item_" + i + "_description", item.getDescription());
prop.putHTML("showitems_item_" + i + "_language", item.getLanguage());
prop.putHTML("showitems_item_" + i + "_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
@ -334,10 +334,10 @@ public class Load_RSS_p {
}
prop.put("showitems_item", i);
prop.put("showitems_num", i);
prop.putHTML("showitems_rss", url.toNormalform(true, false));
prop.putHTML("showitems_rss", url.toNormalform(true));
if (i > 0) {
prop.put("showload", 1);
prop.put("showload_rss", url.toNormalform(true, false));
prop.put("showload_rss", url.toNormalform(true));
}
}

View File

@ -112,7 +112,7 @@ public class QuickCrawlLink_p {
if (crawlingStart != null) {
crawlingStart = crawlingStart.trim();
try {crawlingStart = new DigestURI(crawlingStart).toNormalform(true, true);} catch (final MalformedURLException e1) {}
try {crawlingStart = new DigestURI(crawlingStart).toNormalform(true);} catch (final MalformedURLException e1) {}
// check if url is proper
DigestURI crawlingStartURL = null;
@ -133,7 +133,7 @@ public class QuickCrawlLink_p {
CrawlProfile pe = null;
try {
pe = new CrawlProfile(
crawlingStartURL.toNormalform(true, false),
crawlingStartURL.toNormalform(true),
crawlingMustMatch, //crawlerUrlMustMatch
crawlingMustNotMatch, //crawlerUrlMustNotMatch
CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch

View File

@ -64,7 +64,7 @@ public class ServerScannerList {
host = se.next();
try {
u = new DigestURI(host.getKey().url());
urlString = u.toNormalform(true, false);
urlString = u.toNormalform(true);
prop.put("servertable_list_" + i + "_edit", edit ? 1 : 0);
prop.put("servertable_list_" + i + "_edit_pk", ASCII.String(u.hash()));
prop.put("servertable_list_" + i + "_edit_count", i);

View File

@ -64,7 +64,7 @@ public class SettingsAck_p {
// get referer for backlink
final MultiProtocolURI referer = header.referer();
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true, true));
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true));
//if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString());
if (post == null) {

View File

@ -157,7 +157,7 @@ public class ViewFile {
prop.put("url", "");
return prop;
}
prop.put("url", url.toNormalform(false, true));
prop.put("url", url.toNormalform(true));
// loading the resource content as byte array
prop.put("error_incache", Cache.has(url.hash()) ? 1 : 0);
@ -200,7 +200,7 @@ public class ViewFile {
} else if (viewMode.equals("iframeWeb")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_WEB);
prop.put("viewMode_url", url.toNormalform(false, true));
prop.put("viewMode_url", url.toNormalform(true));
} else if (viewMode.equals("iframeCache")) {
prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
@ -209,10 +209,10 @@ public class ViewFile {
prop.put("viewMode_html", 0);
if (ext.length() > 0 && "jpg.jpeg.png.gif".indexOf(ext) >= 0) {
prop.put("viewMode_png", 1);
prop.put("viewMode_png_url", url.toNormalform(false, true));
prop.put("viewMode_png_url", url.toNormalform(true));
} else {
prop.put("viewMode_html", 1);
prop.put("viewMode_html_url", url.toNormalform(false, true));
prop.put("viewMode_html_url", url.toNormalform(true));
}
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("words") || viewMode.equals("links")) {
// parsing the resource content
@ -317,8 +317,8 @@ public class ViewFile {
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
prop.put("viewMode_links_" + i + "_type", "image");
prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? "&nbsp;" : markup(wordArray, entry.alt()));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(true));
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(true)));
if (entry.width() > 0 && entry.height() > 0) {
prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
} else {
@ -336,7 +336,7 @@ public class ViewFile {
if (document != null) document.close();
}
prop.put("error", "0");
prop.put("error_url", url.toNormalform(false, true));
prop.put("error_url", url.toNormalform(true));
prop.put("error_hash", urlHash);
prop.put("error_wordCount", wordCount);
prop.putHTML("error_desc", (descr.isEmpty()) ? "&nbsp;" : descr);
@ -447,8 +447,8 @@ public class ViewFile {
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
prop.putHTML("viewMode_links_" + c + "_type", type);
prop.put("viewMode_links_" + c + "_text", text);
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true, false)));
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true, false));
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true)));
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true));
prop.put("viewMode_links_" + c + "_rel", rel);
prop.put("viewMode_links_" + c + "_name", name);
dark = !dark;

View File

@ -81,7 +81,7 @@ public class ViewImage {
if ((url == null) && (urlLicense.length() > 0)) {
url = sb.licensedURLs.releaseLicense(urlLicense);
urlString = (url == null) ? null : url.toNormalform(true, true);
urlString = (url == null) ? null : url.toNormalform(true);
}
if (urlString == null) return null;

View File

@ -74,7 +74,7 @@ public class Vocabulary_p {
String t;
while (ui.hasNext()) {
DigestURI u = ui.next();
String u0 = u.toNormalform(true, false);
String u0 = u.toNormalform(true);
t = "";
if (discoverFromPath) {
t = u0.substring(discoverobjectspace.length());
@ -129,7 +129,7 @@ public class Vocabulary_p {
if (post.get("add_new", "").equals("checked") && post.get("newterm", "").length() > 0) {
String objectlink = post.get("newobjectlink", "");
if (objectlink.length() > 0) try {
objectlink = new MultiProtocolURI(objectlink).toNormalform(true, false);
objectlink = new MultiProtocolURI(objectlink).toNormalform(true);
} catch (MalformedURLException e) {}
vocabulary.put(post.get("newterm", ""), post.get("newsynonyms", ""), objectlink);
}

View File

@ -133,9 +133,9 @@ public class getpageinfo {
count = 0;
for (final MultiProtocolURI uri: uris) {
if (uri == null) continue;
links.append(';').append(uri.toNormalform(true, false));
links.append(';').append(uri.toNormalform(true));
filter.append('|').append(uri.getProtocol()).append("://").append(uri.getHost()).append(".*");
prop.putXML("links_" + count + "_link", uri.toNormalform(true, false));
prop.putXML("links_" + count + "_link", uri.toNormalform(true));
count++;
}
prop.put("links", count);

View File

@ -133,9 +133,9 @@ public class getpageinfo_p {
count = 0;
for (final MultiProtocolURI uri: uris) {
if (uri == null) continue;
links.append(';').append(uri.toNormalform(true, false));
links.append(';').append(uri.toNormalform(true));
filter.append('|').append(uri.getProtocol()).append("://").append(uri.getHost()).append(".*");
prop.putXML("links_" + count + "_link", uri.toNormalform(true, false));
prop.putXML("links_" + count + "_link", uri.toNormalform(true));
count++;
}
prop.put("links", count);

View File

@ -109,13 +109,13 @@ public class webstructure {
prop.put("references_documents_0_count", scraper.inboundLinks().size() + scraper.outboundLinks().size());
prop.put("references_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date()));
prop.put("references_documents_0_urle", url == null ? 0 : 1);
if (url != null) prop.putXML("references_documents_0_urle_url", url.toNormalform(true, false));
if (url != null) prop.putXML("references_documents_0_urle_url", url.toNormalform(true));
int d = 0;
Iterator<MultiProtocolURI> i = scraper.inboundLinks().iterator();
while (i.hasNext()) {
DigestURI refurl = new DigestURI(i.next());
byte[] refhash = refurl.hash();
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true));
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
prop.put("references_documents_0_anchors_" + d + "_outbound", 0);
d++;
@ -124,7 +124,7 @@ public class webstructure {
while (i.hasNext()) {
DigestURI refurl = new DigestURI(i.next());
byte[] refhash = refurl.hash();
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true));
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
prop.put("references_documents_0_anchors_" + d + "_outbound", 1);
d++;
@ -152,7 +152,7 @@ public class webstructure {
prop.put("citations_documents_0_count", citations.size());
prop.put("citations_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote())));
prop.put("citations_documents_0_urle", url == null ? 0 : 1);
if (url != null) prop.putXML("citations_documents_0_urle_url", url.toNormalform(true, false));
if (url != null) prop.putXML("citations_documents_0_urle_url", url.toNormalform(true));
int d = 0;
Iterator<CitationReference> i = citations.entries();
while (i.hasNext()) {
@ -160,7 +160,7 @@ public class webstructure {
byte[] refhash = cr.urlhash();
DigestURI refurl = authenticated ? sb.getURL(refhash) : null;
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true));
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);
prop.put("citations_documents_0_anchors_" + d + "_urle_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified())));
d++;

View File

@ -115,7 +115,7 @@ public class yacydoc {
prop.putXML("dc_contributor", "");
prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate()));
prop.putXML("dc_type", String.valueOf(entry.doctype()));
prop.putXML("dc_identifier", entry.url().toNormalform(false, true));
prop.putXML("dc_identifier", entry.url().toNormalform(true));
prop.putXML("dc_language", ASCII.String(entry.language()));
prop.putXML("collection", Arrays.toString(entry.collections()));
prop.put("geo_lat", entry.lat());
@ -124,7 +124,7 @@ public class yacydoc {
prop.put("yacy_urlhash", entry.url().hash());
prop.putXML("yacy_loaddate", entry.loaddate().toString());
prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true));
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(true));
prop.put("yacy_size", entry.size());
prop.put("yacy_words", entry.wordCount());
prop.put("yacy_citations", sb.index.urlCitation().count(entry.hash()));

View File

@ -51,7 +51,7 @@ public class cytag {
StringBuilder connect = new StringBuilder();
connect.append('{');
appendJSON(connect, "time", GenericFormatter.SHORT_MILSEC_FORMATTER.format());
appendJSON(connect, "trail", (referer == null) ? "" : referer.toNormalform(false, false));
appendJSON(connect, "trail", (referer == null) ? "" : referer.toNormalform(false));
appendJSON(connect, "nick", (post == null) ? "" : post.get("nick", ""));
appendJSON(connect, "tag", (post == null) ? "" : post.get("tag", ""));
appendJSON(connect, "icon", (post == null) ? "" : post.get("icon", ""));

View File

@ -102,7 +102,7 @@ public class rct_p {
* @return
*/
private static String urlToString(final DigestURI url) {
return (url == null ? "null" : url.toNormalform(true, false));
return (url == null ? "null" : url.toNormalform(true));
}
private static void listHosts(final Switchboard sb, final serverObjects prop) {

View File

@ -140,7 +140,7 @@ public final class crawlReceipt {
// Check URL against DHT blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
// URL is blacklisted
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam);
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false) + " from peer " + iam);
prop.put("delay", "9999");
return prop;
}
@ -150,7 +150,7 @@ public final class crawlReceipt {
sb.index.fulltext().putMetadata(entry);
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false));
// ready for more
prop.put("delay", "10");

View File

@ -123,7 +123,7 @@ public final class transferURL {
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
@ -147,11 +147,11 @@ public final class transferURL {
}
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true));
try {
sb.index.fulltext().putMetadata(lEntry);
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
received++;
} catch (final IOException e) {
Log.logException(e);

View File

@ -91,8 +91,8 @@ public class urls {
// create RSS entry
prop.put("item_" + c + "_title", "");
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true));
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true));
prop.putXML("item_" + c + "_description", entry.name());
prop.put("item_" + c + "_author", "");
prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.appdate()));
@ -119,8 +119,8 @@ public class urls {
referrer = sb.getURL(entry.referrerHash());
// create RSS entry
prop.put("item_" + c + "_title", entry.dc_title());
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true));
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true));
prop.putXML("item_" + c + "_description", entry.dc_title());
prop.put("item_" + c + "_author", entry.dc_creator());
prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate()));

View File

@ -668,7 +668,7 @@ public class yacysearch {
if ( documents != null ) {
// create a news message
final Map<String, String> map = new HashMap<String, String>();
map.put("url", urlentry.url().toNormalform(false, true).replace(',', '|'));
map.put("url", urlentry.url().toNormalform(true).replace(',', '|'));
map.put("title", urlentry.dc_title().replace(',', ' '));
map.put("description", documents[0].dc_title().replace(',', ' '));
map.put("author", documents[0].dc_creator());

View File

@ -262,7 +262,7 @@ public class yacysearchitem {
if (ms == null) {
prop.put("content_item", "0");
} else {
final String resultUrlstring = ms.url().toNormalform(true, false);
final String resultUrlstring = ms.url().toNormalform(true);
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
final String license = sb.licensedURLs.aquireLicense(ms.url());
@ -278,8 +278,8 @@ public class yacysearchitem {
prop.put("content_item_height", 0);
prop.put("content_item_attr", ""/*(ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"*/); // attributes, here: original size of image
prop.put("content_item_urlhash", ASCII.String(ms.url().hash()));
prop.put("content_item_source", ms.url().toNormalform(true, false));
prop.putXML("content_item_source-xml", ms.url().toNormalform(true, false));
prop.put("content_item_source", ms.url().toNormalform(true));
prop.putXML("content_item_source-xml", ms.url().toNormalform(true));
prop.put("content_item_sourcedom", ms.url().getHost());
prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1);
prop.put("content_item", 1);
@ -299,7 +299,7 @@ public class yacysearchitem {
if (ms == null) {
prop.put("content_item", "0");
} else {
final String resultUrlstring = ms.url().toNormalform(true, false);
final String resultUrlstring = ms.url().toNormalform(true);
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
prop.putHTML("content_item_href", resultUrlstring);
prop.putHTML("content_item_hrefshort", nxTools.shortenURLString(resultUrlstring, MAX_URL_LENGTH));

View File

@ -61,6 +61,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public static final MultiProtocolURI POISON = new MultiProtocolURI(); // poison pill for concurrent link generators
private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
private static final long serialVersionUID = -1173233022912141884L;
private static final long SMB_TIMEOUT = 5000;
@ -628,6 +629,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
this.searchpart = null;
} else {
this.searchpart = this.path.substring(r + 1);
// strip &amp;
Matcher matcher = ampPattern.matcher(this.searchpart);
while (matcher.find()) {
this.searchpart = matcher.replaceAll("&");
matcher.reset(this.searchpart);
}
this.path = this.path.substring(0, r);
}
}
@ -808,11 +815,11 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
@Override
public String toString() {
return toNormalform(false, true);
return toNormalform(false);
}
public String toTokens() {
return toTokens(unescape(this.toNormalform(true, true)));
return toTokens(unescape(this.toNormalform(true)));
}
/**
@ -881,25 +888,11 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
return CharType.high;
}
public String toNormalform(final boolean excludeAnchor, final boolean stripAmp) {
return toNormalform(excludeAnchor, stripAmp, false);
public String toNormalform(final boolean excludeAnchor) {
return toNormalform(excludeAnchor, false);
}
private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
public String toNormalform(final boolean excludeAnchor, final boolean stripAmp, final boolean removeSessionID) {
String result = toNormalform0(excludeAnchor, removeSessionID);
if (stripAmp) {
Matcher matcher = ampPattern.matcher(result);
while (matcher.find()) {
result = matcher.replaceAll("&");
matcher.reset(result);
}
}
return result;
}
private String toNormalform0(final boolean excludeAnchor, final boolean removeSessionID) {
public String toNormalform(final boolean excludeAnchor, final boolean removeSessionID) {
// generates a normal form of the URL
boolean defaultPort = false;
if (this.protocol.equals("mailto")) {
@ -915,7 +908,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
} else if (isFile()) {
defaultPort = true;
}
final String urlPath = this.getFile(excludeAnchor, removeSessionID);
String urlPath = this.getFile(excludeAnchor, removeSessionID);
String h = getHost();
final StringBuilder u = new StringBuilder(20 + urlPath.length() + ((h == null) ? 0 : h.length()));
u.append(this.protocol);
@ -932,12 +925,14 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
u.append(this.port);
}
u.append(urlPath);
return u.toString();
String result = u.toString();
return result;
}
@Override
public int hashCode() {
return this.toNormalform(true, true).hashCode();
return this.toNormalform(true).hashCode();
}
/* (non-Javadoc)
@ -967,7 +962,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
if (this.userInfo != null && h.userInfo != null && (c = this.userInfo.compareTo(h.userInfo)) != 0) return c;
if (this.path != null && h.path != null && (c = this.path.compareTo(h.path)) != 0) return c;
if (this.searchpart != null && h.searchpart != null && (c = this.searchpart.compareTo(h.searchpart)) != 0) return c;
return toNormalform(true, true).compareTo(h.toNormalform(true, true));
return toNormalform(true).compareTo(h.toNormalform(true));
}
public boolean isPOST() {
@ -1895,7 +1890,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
*/
public java.net.URL getURL() throws MalformedURLException {
if (!(isHTTP() || isHTTPS() || isFTP())) throw new MalformedURLException();
return new java.net.URL(this.toNormalform(false, true));
return new java.net.URL(this.toNormalform(false));
}
/**
@ -1904,7 +1899,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
*/
public java.io.File getFSFile() throws MalformedURLException {
if (!isFile()) throw new MalformedURLException();
return new java.io.File(this.toNormalform(false, true).substring(7));
return new java.io.File(this.toNormalform(true).substring(7));
}
/**
@ -1914,7 +1909,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
*/
public SmbFile getSmbFile() throws MalformedURLException {
if (!isSMB()) throw new MalformedURLException();
final String url = unescape(this.toNormalform(false, true));
final String url = unescape(this.toNormalform(true));
return new SmbFile(url);
}
@ -2188,8 +2183,8 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
// check stability: the normalform of the normalform must be equal to the normalform
if (aURL != null) try {
aURL1 = new MultiProtocolURI(aURL.toNormalform(false, true));
if (!(aURL1.toNormalform(false, true).equals(aURL.toNormalform(false, true)))) {
aURL1 = new MultiProtocolURI(aURL.toNormalform(false));
if (!(aURL1.toNormalform(false).equals(aURL.toNormalform(false)))) {
System.out.println("no stability for url:");
System.out.println("aURL0=" + aURL.toString());
System.out.println("aURL1=" + aURL1.toString());

View File

@ -56,7 +56,7 @@ public class RSSFeed implements Iterable<RSSMessage> {
String u;
RSSMessage message;
for (MultiProtocolURI uri: links) {
u = uri.toNormalform(true, false);
u = uri.toNormalform(true);
message = new RSSMessage(u, "", u);
message.setAuthor(source);
this.addMessage(message);

View File

@ -113,7 +113,7 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
this.map = new HashMap<String, String>();
this.map.put("title", title);
this.map.put("description", description);
this.map.put("link", link.toNormalform(true, false));
this.map.put("link", link.toNormalform(true));
this.map.put("pubDate", ISO8601Formatter.FORMATTER.format());
this.map.put("guid", guid);
}

View File

@ -104,7 +104,7 @@ public class Scanner extends Thread {
@Override
public String toString() {
try {
return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true, false);
return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true);
} catch (final MalformedURLException e) {
return "";
}

View File

@ -341,7 +341,7 @@ public class HTTPClient {
*/
public byte[] GETbytes(final MultiProtocolURI url, final int maxBytes) throws IOException {
final boolean localhost = Domains.isLocalhost(url.getHost());
final String urix = url.toNormalform(true, false);
final String urix = url.toNormalform(true);
final HttpGet httpGet = new HttpGet(urix);
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
return getContentBytes(httpGet, maxBytes);
@ -358,7 +358,7 @@ public class HTTPClient {
public void GET(final String uri) throws IOException {
if (this.currentRequest != null) throw new IOException("Client is in use!");
final MultiProtocolURI url = new MultiProtocolURI(uri);
final HttpGet httpGet = new HttpGet(url.toNormalform(true, false));
final HttpGet httpGet = new HttpGet(url.toNormalform(true));
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
this.currentRequest = httpGet;
execute(httpGet);
@ -373,7 +373,7 @@ public class HTTPClient {
*/
public HttpResponse HEADResponse(final String uri) throws IOException {
final MultiProtocolURI url = new MultiProtocolURI(uri);
final HttpHead httpHead = new HttpHead(url.toNormalform(true, false));
final HttpHead httpHead = new HttpHead(url.toNormalform(true));
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
execute(httpHead);
finish();
@ -394,7 +394,7 @@ public class HTTPClient {
public void POST(final String uri, final InputStream instream, final long length) throws IOException {
if (this.currentRequest != null) throw new IOException("Client is in use!");
final MultiProtocolURI url = new MultiProtocolURI(uri);
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
String host = url.getHost();
if (host == null) host = Domains.LOCALHOST;
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
@ -430,7 +430,7 @@ public class HTTPClient {
* @throws IOException
*/
public byte[] POSTbytes(final MultiProtocolURI url, final String vhost, final Map<String, ContentBody> post, final boolean usegzip) throws IOException {
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
setHost(vhost); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
if (vhost == null) setHost(Domains.LOCALHOST);
@ -461,7 +461,7 @@ public class HTTPClient {
*/
public byte[] POSTbytes(final String uri, final InputStream instream, final long length) throws IOException {
final MultiProtocolURI url = new MultiProtocolURI(uri);
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
String host = url.getHost();
if (host == null) host = Domains.LOCALHOST;
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service

View File

@ -209,7 +209,7 @@ public final class CrawlStacker {
if (replace) {
this.indexSegment.fulltext().remove(urlhash);
this.nextQueue.urlRemove(urlhash);
String u = url.toNormalform(true, true);
String u = url.toNormalform(true);
if (u.endsWith("/")) {
u = u + "index.html";
} else if (!u.contains(".")) {
@ -393,7 +393,7 @@ public final class CrawlStacker {
} else if (remote) {
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry);
}
if (warning != null) this.log.logWarning("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning);
if (warning != null) this.log.logWarning("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true) + " - not pushed: " + warning);
return null;
}

View File

@ -208,14 +208,14 @@ public final class Cache {
// store the response header into the header database
final HashMap<String, String> hm = new HashMap<String, String>();
hm.putAll(responseHeader);
hm.put("@@URL", url.toNormalform(true, false));
hm.put("@@URL", url.toNormalform(true));
try {
responseHeaderDB.insert(url.hash(), hm);
} catch (final Exception e) {
fileDB.delete(url.hash());
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
}
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true));
}
/**

View File

@ -536,7 +536,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
}
public static String mustMatchSubpath(final MultiProtocolURI uri) {
String u = uri.toNormalform(true, true);
String u = uri.toNormalform(true);
if (!u.endsWith("/")) {int p = u.lastIndexOf("/"); if (p > 0) u = u.substring(0, p + 1);}
return new StringBuilder(u.length() + 5).append(Pattern.quote(u)).append(".*").toString();
}

View File

@ -278,7 +278,7 @@ public class CrawlQueues {
}
try {
this.sb.indexingDocumentProcessor.enQueue(new IndexingQueueEntry(new Response(urlEntry, profile), null, null));
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true));
} catch (final InterruptedException e) {
Log.logException(e);
}
@ -545,7 +545,7 @@ public class CrawlQueues {
* @return
*/
private static String urlToString(final DigestURI url) {
return (url == null ? "null" : url.toNormalform(true, false));
return (url == null ? "null" : url.toNormalform(true));
}
public int limitCrawlJobSize() {

View File

@ -65,7 +65,7 @@ public class ResultImages {
for (final ImageEntry image: images.values()) {
// do a double-check; attention: this can be time-consuming since this possibly needs a DNS-lookup
if (image == null || image.url() == null) continue;
String url = image.url().toNormalform(true, false);
String url = image.url().toNormalform(true);
if (doubleCheck.contains(url)) continue;
doubleCheck.add(url);

View File

@ -170,14 +170,14 @@ public class ZURL implements Iterable<ZURL.Entry> {
final Entry entry = new Entry(bentry, executor, workdate, workcount, reason);
put(entry);
this.stack.add(entry.hash());
if (!reason.startsWith("double")) log.logInfo(bentry.url().toNormalform(false, false) + " - " + reason);
if (!reason.startsWith("double")) log.logInfo(bentry.url().toNormalform(true) + " - " + reason);
if (this.solrConnector != null && failCategory.store) {
// send the error to solr
try {
SolrInputDocument errorDoc = this.solrConfiguration.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
this.solrConnector.add(errorDoc);
} catch (final IOException e) {
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
}
}
while (this.stack.size() > maxStackSize) this.stack.poll();

View File

@ -117,7 +117,7 @@ public class FTPLoader {
final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
final DigestURI u = this.sb.getURL(request.referrerhash());
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false));
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true));
}
final StringBuilder dirList = ftpClient.dirhtml(path);
@ -224,7 +224,7 @@ public class FTPLoader {
final RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
final DigestURI refurl = this.sb.getURL(request.referrerhash());
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true));
}
final ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(fileDate));

View File

@ -63,14 +63,14 @@ public class FileLoader {
RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true));
}
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
String[] l = null;
try {l = url.list();} catch (IOException e) {}
if (l != null) {
String u = url.toNormalform(true, true);
String u = url.toNormalform(true);
List<String> list = new ArrayList<String>();
for (String s: l) {
list.add(u + ((u.endsWith("/") || u.endsWith("\\")) ? "" : "/") + s);

View File

@ -119,7 +119,7 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
@ -135,7 +135,7 @@ public final class HTTPLoader {
final byte[] responseBody = client.GETbytes(url, maxFileSize);
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(false, false);
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {

View File

@ -70,11 +70,11 @@ public class RSSLoader extends Thread {
Log.logWarning("Load_RSS", "rss loading for url '" + getName().substring(9) + "' failed: " + e.getMessage());
return;
} catch (final IOException e) {
Log.logWarning("Load_RSS", "rss loading for url '" + this.urlf.toNormalform(true, false) + "' failed: " + e.getMessage());
Log.logWarning("Load_RSS", "rss loading for url '" + this.urlf.toNormalform(true) + "' failed: " + e.getMessage());
return;
}
if (rss == null) {
Log.logWarning("Load_RSS", "no rss for url " + this.urlf.toNormalform(true, false));
Log.logWarning("Load_RSS", "no rss for url " + this.urlf.toNormalform(true));
return;
}
final RSSFeed feed = rss.getFeed();
@ -111,7 +111,7 @@ public class RSSLoader extends Thread {
final int lastAvg = rssRow.get("avg_upd_per_day", 0);
final long thisAvg = 1000 * 60 * 60 * 24 / deltaTime * loadCount;
final long nextAvg = lastAvg == 0 ? thisAvg : (thisAvg + lastAvg * 2) / 3;
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
rssRow.put("url", UTF8.getBytes(url.toNormalform(true)));
rssRow.put("title", feed.getChannel().getTitle());
rssRow.put("last_load_date", new Date());
rssRow.put("last_load_count", loadCount);
@ -130,20 +130,20 @@ public class RSSLoader extends Thread {
// record API action
byte[] pk = null;
final serverObjects post = new serverObjects();
post.put("url", url.toNormalform(true, false));
post.put("url", url.toNormalform(true));
post.put("indexAllItemContent", "");
if (apicall_pk != null) post.put(WorkTables.TABLE_API_COL_APICALL_PK, apicall_pk);
if (repeat_time > 0) {
// store as scheduled api call
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false), repeat_time, repeat_unit.substring(3));
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true), repeat_time, repeat_unit.substring(3));
} else {
// store just a protocol
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false));
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true));
}
// store pk of api table into rss table to show that the entry has been recorded
assert pk != null;
final Tables.Data rssRow = new Tables.Data();
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
rssRow.put("url", UTF8.getBytes(url.toNormalform(true)));
rssRow.put("title", feed.getChannel().getTitle());
rssRow.put("api_pk", pk);
try {

View File

@ -75,14 +75,14 @@ public class SMBLoader {
RequestHeader requestHeader = new RequestHeader();
if (request.referrerhash() != null) {
DigestURI ur = this.sb.getURL(request.referrerhash());
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true));
}
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
String[] l = null;
try {l = url.list();} catch (IOException e) {}
if (l != null) {
String u = url.toNormalform(true, true);
String u = url.toNormalform(true);
List<String> list = new ArrayList<String>();
for (String s: l) {
if (s.startsWith(".")) continue;

View File

@ -307,7 +307,7 @@ public class RobotsTxt {
reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
// adding referer
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true));
reqHeaders.put(HeaderFramework.ACCEPT, HTTPLoader.DEFAULT_ACCEPT);
if (entry != null) {
oldEtag = entry.getETag();

View File

@ -496,7 +496,7 @@ public class BookmarksDB {
public Bookmark(final DigestURI url) {
this.entry = new HashMap<String, String>();
this.urlHash = ASCII.String(url.hash());
this.entry.put(BOOKMARK_URL, url.toNormalform(false, true));
this.entry.put(BOOKMARK_URL, url.toNormalform(false));
this.tagNames = new HashSet<String>();
this.timestamp = System.currentTimeMillis();
final Bookmark oldBm=getBookmark(this.urlHash);

View File

@ -313,7 +313,7 @@ public class WorkTables extends Tables {
// create and insert new entry
Data data = new Data();
byte[] date = UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format());
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true, false));
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true));
data.put(TABLE_SEARCH_FAILURE_COL_DATE, date);
data.put(TABLE_SEARCH_FAILURE_COL_WORDS, queryHashes.export());
data.put(TABLE_SEARCH_FAILURE_COL_COMMENT, UTF8.getBytes(reason));

View File

@ -173,7 +173,7 @@ public class YMarkCrawlStart extends HashMap<String,String>{
final int depth,
final boolean crawlingQ, final boolean medialink) {
final CrawlProfile pe = new CrawlProfile(
(startURL.getHost() == null) ? startURL.toNormalform(true, false) : startURL.getHost(),
(startURL.getHost() == null) ? startURL.toNormalform(true) : startURL.getHost(),
urlMustMatch,
urlMustNotMatch,
CrawlProfile.MATCH_ALL_STRING,

View File

@ -372,7 +372,7 @@ public class YMarkTables {
final YMarkMetadata meta = new YMarkMetadata(url);
final Document document = meta.loadDocument(loader);
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
final String urls = url.toNormalform(true, false);
final String urls = url.toNormalform(true);
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls);
if(!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) {
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");

View File

@ -159,7 +159,7 @@ public final class Condenser {
Iterator<Map.Entry<MultiProtocolURI, String>> i = document.getAudiolinks().entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasaudio, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasaudio, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasaudio, this.RESULT_FLAGS, true, meaningLib);
}
@ -167,7 +167,7 @@ public final class Condenser {
i = document.getVideolinks().entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasvideo, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasvideo, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasvideo, this.RESULT_FLAGS, true, meaningLib);
}
@ -175,7 +175,7 @@ public final class Condenser {
i = document.getApplinks().entrySet().iterator();
while (i.hasNext()) {
entry = i.next();
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasapp, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasapp, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasapp, this.RESULT_FLAGS, true, meaningLib);
}
@ -187,7 +187,7 @@ public final class Condenser {
ientry = j.next();
url = ientry.url();
if (url == null) continue;
insertTextToWords(new SentenceReader(url.toNormalform(false, false)), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(url.toNormalform(true)), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
insertTextToWords(new SentenceReader(ientry.alt()), 99, flag_cat_hasimage, this.RESULT_FLAGS, true, meaningLib);
}

View File

@ -247,7 +247,7 @@ dc_rights
}
// put to triplestore
JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1));
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true, false));
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true));
}
}
@ -290,7 +290,7 @@ dc_rights
}
public String dc_identifier() {
return this.source.toNormalform(true, false);
return this.source.toNormalform(true);
}
public MultiProtocolURI dc_source() {
@ -482,7 +482,7 @@ dc_rights
} else {
this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
}
u = url.toNormalform(true, false);
u = url.toNormalform(true);
final String name = entry.getValue().getProperty("name", "");
if (u.startsWith("mailto:")) {
this.emaillinks.put(u.substring(7), name);
@ -552,7 +552,7 @@ dc_rights
assert false;
continue;
}
u = url.toNormalform(true, true);
u = url.toNormalform(true);
if (u.endsWith("/"))
u = u.substring(0, u.length() - 1);
pos = u.lastIndexOf('/');
@ -603,7 +603,7 @@ dc_rights
continue loop;
}
if (url == null) continue loop;
u = url.toNormalform(true, true);
u = url.toNormalform(true);
if ((pos = u.toLowerCase().indexOf("http://", 7)) > 0) {
i.remove();
u = u.substring(pos);

View File

@ -97,12 +97,12 @@ public interface Parser {
}
public Failure(final String message, final MultiProtocolURI url) {
super(message + "; url = " + url.toNormalform(true, false));
super(message + "; url = " + url.toNormalform(true));
this.url = url;
}
public Failure(final String message, final MultiProtocolURI url, Throwable e) {
super(message + "; url = " + url.toNormalform(true, false), e);
super(message + "; url = " + url.toNormalform(true), e);
this.url = url;
}

View File

@ -198,7 +198,7 @@ public final class TextParser {
AbstractParser.log.logWarning(errorMsg);
throw new Parser.Failure(errorMsg, location);
}
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true, false);
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
Document[] docs = parseSource(location, mimeType, idioms, charset, content);
@ -222,7 +222,7 @@ public final class TextParser {
AbstractParser.log.logWarning(errorMsg);
throw new Parser.Failure(errorMsg, location);
}
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true, false);
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
// if we do not have more than one parser or the content size is over MaxInt
// then we use only one stream-oriented parser.
@ -315,7 +315,7 @@ public final class TextParser {
}
String failedParsers = "";
for (final Map.Entry<Parser, Parser.Failure> error: failedParser.entrySet()) {
AbstractParser.log.logWarning("tried parser '" + error.getKey().getName() + "' to parse " + location.toNormalform(true, false) + " but failed: " + error.getValue().getMessage(), error.getValue());
AbstractParser.log.logWarning("tried parser '" + error.getKey().getName() + "' to parse " + location.toNormalform(true) + " but failed: " + error.getValue().getMessage(), error.getValue());
failedParsers += error.getKey().getName() + " ";
}
throw new Parser.Failure("All parser failed: " + failedParsers, location);

View File

@ -68,7 +68,7 @@ public class DCEntry extends TreeMap<String, String> {
double lon
) {
super((Collator) insensitiveCollator.clone());
this.put("dc:identifier", url.toNormalform(true, false));
this.put("dc:identifier", url.toNormalform(true));
this.put("dc:date", ISO8601Formatter.FORMATTER.format(date));
this.put("dc:title", title);
this.put("dc:creator", author);

View File

@ -116,7 +116,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
@Override
public String source() {
return this.source.toNormalform(true, false);
return this.source.toNormalform(true);
}
@Override

View File

@ -49,7 +49,7 @@ public class OAIPMHLoader {
this.source = source;
// load the file from the net
Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true, false));
Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true));
Response response = null;
IOException ee = null;
for (int i = 0; i < 5; i++) {
@ -58,7 +58,7 @@ public class OAIPMHLoader {
response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay);
break;
} catch (IOException e) {
Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true, false));
Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true));
ee = e;
continue;
}
@ -80,7 +80,7 @@ public class OAIPMHLoader {
}
public String source() {
return this.source.toNormalform(true, false);
return this.source.toNormalform(true);
}
public static StringBuilder escape(final String s) {

View File

@ -106,7 +106,7 @@ public class ResumptionToken extends TreeMap<String, String> {
* @return a string containing the url up to and including the '?'
*/
public static String truncatedURL(final DigestURI url) {
String u = url.toNormalform(true, true);
String u = url.toNormalform(true);
final int i = u.indexOf('?');
if (i > 0) u = u.substring(0, i + 1);
return u;

View File

@ -86,7 +86,7 @@ public class AugmentParser extends AbstractParser implements Parser {
it = Switchboard.getSwitchboard().tables.orderBy(it, -1, "timestamp_creation").iterator();
while (it.hasNext()) {
net.yacy.kelondro.blob.Tables.Row r = it.next();
if (r.get("url", "").equals (url.toNormalform(false, false))) {
if (r.get("url", "").equals (url.toNormalform(false))) {
Set<String> tags = new HashSet<String>();
for (String s : YMarkUtil.keysStringToSet(r.get("scitag", ""))) {
tags.add(s);

View File

@ -185,7 +185,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.htmlFilterEventListeners = new EventListenerList();
this.lon = 0.0d;
this.lat = 0.0d;
this.evaluationScores.match(Element.url, root.toNormalform(false, false));
this.evaluationScores.match(Element.url, root.toNormalform(true));
this.canonical = null;
this.breadcrumbs = 0;
}
@ -348,10 +348,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
} catch (final MalformedURLException e) {}
} else if (tagname.equalsIgnoreCase("frame")) {
final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
tagopts.put("src", src.toNormalform(true, false));
tagopts.put("src", src.toNormalform(true));
mergeAnchors(src, tagopts /* with property "name" */);
this.frames.add(src);
this.evaluationScores.match(Element.framepath, src.toNormalform(true, false));
this.evaluationScores.match(Element.framepath, src.toNormalform(true));
} else if (tagname.equalsIgnoreCase("body")) {
final String c = tagopts.getProperty("class", EMPTY_STRING);
this.evaluationScores.match(Element.bodyclass, c);
@ -386,7 +386,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (href.length() > 0) {
tagopts.put("nme", areatitle);
MultiProtocolURI url = absolutePath(href);
tagopts.put("href", url.toNormalform(true, false));
tagopts.put("href", url.toNormalform(true));
mergeAnchors(url, tagopts);
}
} else if (tagname.equalsIgnoreCase("link")) {
@ -394,7 +394,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final MultiProtocolURI newLink = absolutePath(href);
if (newLink != null) {
tagopts.put("href", newLink.toNormalform(true, false));
tagopts.put("href", newLink.toNormalform(true));
final String rel = tagopts.getProperty("rel", EMPTY_STRING);
final String linktitle = tagopts.getProperty("title", EMPTY_STRING);
final String type = tagopts.getProperty("type", EMPTY_STRING);
@ -425,7 +425,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (url != null) {
final int width = Integer.parseInt(tagopts.getProperty("width", "-1"));
final int height = Integer.parseInt(tagopts.getProperty("height", "-1"));
tagopts.put("src", url.toNormalform(true, false));
tagopts.put("src", url.toNormalform(true));
final EmbedEntry ie = new EmbedEntry(url, width, height, tagopts.getProperty("type", EMPTY_STRING), tagopts.getProperty("pluginspage", EMPTY_STRING));
this.embeds.put(url, ie);
mergeAnchors(url, tagopts);
@ -436,15 +436,15 @@ public class ContentScraper extends AbstractScraper implements Scraper {
final String name = tagopts.getProperty("name", EMPTY_STRING);
if (name.equalsIgnoreCase("movie")) {
MultiProtocolURI url = absolutePath(tagopts.getProperty("value", EMPTY_STRING));
tagopts.put("value", url.toNormalform(true, false));
tagopts.put("value", url.toNormalform(true));
mergeAnchors(url, tagopts /* with property "name" */);
}
} else if (tagname.equalsIgnoreCase("iframe")) {
final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
tagopts.put("src", src.toNormalform(true, false));
tagopts.put("src", src.toNormalform(true));
mergeAnchors(src, tagopts /* with property "name" */);
this.iframes.add(src);
this.evaluationScores.match(Element.iframepath, src.toNormalform(true, false));
this.evaluationScores.match(Element.iframepath, src.toNormalform(true));
} else if (tagname.equalsIgnoreCase("html")) {
final String lang = tagopts.getProperty("lang", EMPTY_STRING);
if (!lang.isEmpty()) // fake a language meta to preserv detection from <html lang="xx" />
@ -471,7 +471,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
addImage(this.images, ie);
} else {
tagopts.put("text", recursiveParse(text));
tagopts.put("href", url.toNormalform(true, false)); // we must assign this because the url may have resolved backpaths and may not be absolute
tagopts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
mergeAnchors(url, tagopts);
}
}

View File

@ -58,7 +58,7 @@ public class EmbedEntry {
@Override
public String toString() {
return "<embed url=\"" + this.url.toNormalform(false, false) + "\"" +
return "<embed url=\"" + this.url.toNormalform(false) + "\"" +
(this.type != null && this.type.length() > 0 ? " type=\"" + this.type + "\"" : "") +
(this.pluginspage != null && this.pluginspage.length() > 0 ? " pluginspage=\"" + this.pluginspage + "\"" : "") +
(this.width >= 0 ? " width=\"" + this.width + "\"" : "") +

View File

@ -66,7 +66,7 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry
@Override
public String toString() {
return "<img url=\"" + this.url.toNormalform(false, false) + "\"" +
return "<img url=\"" + this.url.toNormalform(false) + "\"" +
(this.alt != null && this.alt.length() > 0 ? " alt=\"" + this.alt + "\"" : "") +
(this.width >= 0 ? " width=\"" + this.width + "\"" : "") +
(this.height >= 0 ? " height=\"" + this.height + "\"" : "") +
@ -91,7 +91,7 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry
// assuming that hashCode would return a 'perfect hash' this method would
// create a total ordering on images with respect on the image size
assert (this.url != null);
if (this.url.toNormalform(true, true).equals((h).url.toNormalform(true, true))) return 0;
if (this.url.toNormalform(true).equals((h).url.toNormalform(true))) return 0;
final int thc = this.hashCode();
final int ohc = (h).hashCode();
if (thc < ohc) return -1;

View File

@ -111,7 +111,7 @@ public class sitemapParser extends AbstractParser implements Parser {
public static SitemapReader parse(final DigestURI sitemapURL) throws IOException {
// download document
Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true, false));
Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();

View File

@ -138,8 +138,8 @@ public class AugmentHtmlStream {
d.head().append ("<script type='text/javascript'>"+loadInternal("interaction_elements/interaction_metadata.js", requestHeader)+"</script>");
d.body().append (loadInternal("interaction_elements/OverlayInteraction.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader));
d.body().append (loadInternal("interaction_elements/Footer.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader));
d.body().append (loadInternal("interaction_elements/OverlayInteraction.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false), requestHeader));
d.body().append (loadInternal("interaction_elements/Footer.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false), requestHeader));
}

View File

@ -242,7 +242,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
final StringBuilder hashs = new StringBuilder(12);
assert hashs.length() == 0;
// form the 'local' part of the hash
final String normalform = toNormalform(true, true, true);
final String normalform = toNormalform(true, true);
final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
if (b64l.length() < 5) return null;
hashs.append(b64l.substring(0, 5)); // 5 chars

View File

@ -346,7 +346,7 @@ public class URIMetadataNode implements URIMetadata {
try {
s.append("hash=").append(ASCII.String(md.hash()));
s.append(",url=").append(crypt.simpleEncode(md.url().toNormalform(false, true)));
s.append(",url=").append(crypt.simpleEncode(md.url().toNormalform(true)));
s.append(",descr=").append(crypt.simpleEncode(md.dc_title()));
s.append(",author=").append(crypt.simpleEncode(md.dc_creator()));
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(md.dc_subject())));

View File

@ -197,7 +197,7 @@ public class URIMetadataRow implements URIMetadata {
final double lat,
final double lon) {
final CharBuffer s = new CharBuffer(3600, 360);
s.append(url.toNormalform(false, true)).appendLF();
s.append(url.toNormalform(true)).appendLF();
s.append(dc_title).appendLF();
if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
s.appendLF();
@ -585,7 +585,7 @@ public class URIMetadataRow implements URIMetadata {
}
public boolean matches(final Pattern matcher) {
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
if (this.url != null) return matcher.matcher(this.url.toNormalform(true).toLowerCase()).matches();
return false;
}
public DigestURI url() {

View File

@ -81,7 +81,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.lother = md.lother();
this.positions = new LinkedBlockingQueue<Integer>();
this.positions.add(1);
String urlNormalform = md.url().toNormalform(true, false);
String urlNormalform = md.url().toNormalform(true);
this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length;
this.urllength = urlNormalform.length();
this.virtualAge = -1; // compute that later

View File

@ -209,7 +209,7 @@ public final class LoaderDispatcher {
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
final Response response = new Response(
request,
requestHeader,
@ -223,7 +223,7 @@ public final class LoaderDispatcher {
// well, just take the cache and don't care about freshness of the content
final byte[] content = Cache.getContent(url.hash());
if (content != null) {
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true, false));
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true));
response.setContent(content);
return response;
}
@ -234,14 +234,14 @@ public final class LoaderDispatcher {
if (response.isFreshForProxy()) {
final byte[] content = Cache.getContent(url.hash());
if (content != null) {
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true, false));
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true));
response.setContent(content);
return response;
}
}
this.log.logInfo("cache hit/stale for: " + url.toNormalform(true, false));
this.log.logInfo("cache hit/stale for: " + url.toNormalform(true));
} else if (cachedResponse != null) {
this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true, false));
this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
}
}

View File

@ -2388,7 +2388,7 @@ public final class Switchboard extends serverSwitch
(
response.profile() == null ||
response.depth() < response.profile().depth() ||
response.profile().crawlerNoDepthLimitMatchPattern().matcher(response.url().toNormalform(false, false)).matches()
response.profile().crawlerNoDepthLimitMatchPattern().matcher(response.url().toNormalform(true)).matches()
)
) {
// get the hyperlinks
@ -2410,7 +2410,7 @@ public final class Switchboard extends serverSwitch
// process the next hyperlink
nextUrl = nextEntry.getKey();
String u = nextUrl.toNormalform(true, true, true);
String u = nextUrl.toNormalform(true, true);
if ( !(u.startsWith("http://")
|| u.startsWith("https://")
|| u.startsWith("ftp://")
@ -2447,7 +2447,7 @@ public final class Switchboard extends serverSwitch
this.log.logInfo("CRAWL: ADDED "
+ hl.size()
+ " LINKS FROM "
+ response.url().toNormalform(false, true)
+ response.url().toNormalform(true)
+ ", STACKING TIME = "
+ (stackEndTime - stackStartTime)
+ ", PARSING TIME = "
@ -2460,7 +2460,7 @@ public final class Switchboard extends serverSwitch
public IndexingQueueEntry condenseDocument(final IndexingQueueEntry in) {
in.queueEntry.updateStatus(Response.QUEUE_STATE_CONDENSING);
CrawlProfile profile = in.queueEntry.profile();
String urls = in.queueEntry.url().toNormalform(false, true);
String urls = in.queueEntry.url().toNormalform(true);
// check profile attributes which prevent indexing (while crawling is allowed)
if (!profile.indexText() && !profile.indexMedia()) {
@ -2616,7 +2616,7 @@ public final class Switchboard extends serverSwitch
for ( final Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet() ) {
final Tables.Data rssRow = new Tables.Data();
rssRow.put("referrer", url.hash());
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true, false)));
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true)));
rssRow.put("title", UTF8.getBytes(rssEntry.getValue()));
rssRow.put("recording_date", new Date());
try {
@ -2643,7 +2643,7 @@ public final class Switchboard extends serverSwitch
EventTracker.update(EventTracker.EClass.PPM, Long.valueOf(currentPPM()), true);
lastPPMUpdate = System.currentTimeMillis();
}
EventTracker.update(EventTracker.EClass.INDEX, url.toNormalform(true, false), false);
EventTracker.update(EventTracker.EClass.INDEX, url.toNormalform(true), false);
// if this was performed for a remote crawl request, notify requester
if ( (processCase == EventOrigin.GLOBAL_CRAWLING) && (queueEntry.initiator() != null) ) {
@ -2719,7 +2719,7 @@ public final class Switchboard extends serverSwitch
final Request request = this.loader.request(url, true, true);
final CrawlProfile profile = sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
final String urls = url.toNormalform(false, false);
final String urls = url.toNormalform(true);
if ( acceptedError != null ) {
this.log.logWarning("addToIndex: cannot load "
+ urls
@ -2759,18 +2759,18 @@ public final class Switchboard extends serverSwitch
searchEvent,
"heuristic:" + heuristicName);
Switchboard.this.log.logInfo("addToIndex fill of url "
+ url.toNormalform(true, true)
+ url.toNormalform(true)
+ " finished");
}
}
} catch ( final IOException e ) {
Switchboard.this.log.logWarning("addToIndex: failed loading "
+ url.toNormalform(false, false)
+ url.toNormalform(true)
+ ": "
+ e.getMessage());
} catch ( final Parser.Failure e ) {
Switchboard.this.log.logWarning("addToIndex: failed parsing "
+ url.toNormalform(false, false)
+ url.toNormalform(true)
+ ": "
+ e.getMessage());
}
@ -2796,7 +2796,7 @@ public final class Switchboard extends serverSwitch
final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
if (acceptedError != null) {
this.log.logInfo("addToCrawler: cannot load "
+ url.toNormalform(false, false)
+ url.toNormalform(true)
+ ": "
+ acceptedError);
return;
@ -2810,7 +2810,7 @@ public final class Switchboard extends serverSwitch
if (s != null) {
Switchboard.this.log.logInfo("addToCrawler: failed to add "
+ url.toNormalform(false, false)
+ url.toNormalform(true)
+ ": "
+ s);
}
@ -2840,7 +2840,7 @@ public final class Switchboard extends serverSwitch
"");
if ( response == null ) {
Switchboard.this.log.logInfo("Sending crawl receipt for '"
+ this.reference.url().toNormalform(false, true)
+ this.reference.url().toNormalform(true)
+ "' to "
+ this.initiatorPeer.getName()
+ " FAILED, send time = "
@ -2849,7 +2849,7 @@ public final class Switchboard extends serverSwitch
}
final String delay = response.get("delay");
Switchboard.this.log.logInfo("Sending crawl receipt for '"
+ this.reference.url().toNormalform(false, true)
+ this.reference.url().toNormalform(true)
+ "' to "
+ this.initiatorPeer.getName()
+ " success, delay = "

View File

@ -557,7 +557,7 @@ public final class Fulltext implements Iterable<byte[]> {
while (i.hasNext()) {
entry = i.next();
if (this.set != null && !this.set.has(entry.hash())) continue;
url = entry.url().toNormalform(true, false);
url = entry.url().toNormalform(true);
if (!url.matches(this.filter)) continue;
if (this.format == 0) {
pw.println(url);

View File

@ -218,7 +218,7 @@ public class Segment {
String hh = DigestURI.hosthash(host);
final BlockingQueue<String> hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000);
final String urlstub = stub.toNormalform(false, false);
final String urlstub = stub.toNormalform(true);
// now filter the stub from the iterated urls
return new LookAheadIterator<DigestURI>() {
@ -234,7 +234,7 @@ public class Segment {
}
if (id == null || id == AbstractSolrConnector.POISON_ID) return null;
DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url();
if (u.toNormalform(true, false).startsWith(urlstub)) return u;
if (u.toNormalform(true).startsWith(urlstub)) return u;
}
}
};
@ -361,7 +361,7 @@ public class Segment {
// load some document metadata
final String id = ASCII.String(url.hash());
final String dc_title = document.dc_title();
final String urlNormalform = url.toNormalform(true, false);
final String urlNormalform = url.toNormalform(true);
final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
// STORE URL TO LOADED-URL-DB

View File

@ -218,7 +218,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains(YaCySchema.failreason_t)) add(doc, YaCySchema.failreason_t, "");
add(doc, YaCySchema.id, ASCII.String(md.hash()));
String us = digestURI.toNormalform(true, false);
String us = digestURI.toNormalform(true);
add(doc, YaCySchema.sku, us);
if (allAttr || contains(YaCySchema.ip_s)) {
final InetAddress address = digestURI.getInetAddress();
@ -345,7 +345,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
boolean allAttr = this.isEmpty();
add(doc, YaCySchema.id, id);
if (allAttr || contains(YaCySchema.failreason_t)) add(doc, YaCySchema.failreason_t, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
String us = digestURI.toNormalform(true, false);
String us = digestURI.toNormalform(true);
add(doc, YaCySchema.sku, us);
if (allAttr || contains(YaCySchema.ip_s)) {
final InetAddress address = digestURI.getInetAddress();
@ -562,7 +562,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final String[] css_url = new String[csss.size()];
c = 0;
for (final Map.Entry<MultiProtocolURI, String> entry: csss.entrySet()) {
final String url = entry.getKey().toNormalform(false, false);
final String url = entry.getKey().toNormalform(false);
inboundLinks.remove(url);
outboundLinks.remove(url);
css_tag[c] =
@ -584,7 +584,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
for (final MultiProtocolURI url: scriptss) {
inboundLinks.remove(url);
outboundLinks.remove(url);
scripts[c++] = url.toNormalform(false, false);
scripts[c++] = url.toNormalform(false);
}
add(doc, YaCySchema.scriptscount_i, scripts.length);
if (scripts.length > 0) add(doc, YaCySchema.scripts_txt, scripts);
@ -598,7 +598,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
for (final MultiProtocolURI url: framess) {
inboundLinks.remove(url);
outboundLinks.remove(url);
frames[c++] = url.toNormalform(false, false);
frames[c++] = url.toNormalform(false);
}
add(doc, YaCySchema.framesscount_i, frames.length);
if (frames.length > 0) add(doc, YaCySchema.frames_txt, frames);
@ -612,7 +612,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
for (final MultiProtocolURI url: iframess) {
inboundLinks.remove(url);
outboundLinks.remove(url);
iframes[c++] = url.toNormalform(false, false);
iframes[c++] = url.toNormalform(false);
}
add(doc, YaCySchema.iframesscount_i, iframes.length);
if (iframes.length > 0) add(doc, YaCySchema.iframes_txt, iframes);
@ -624,7 +624,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (canonical != null) {
inboundLinks.remove(canonical);
outboundLinks.remove(canonical);
add(doc, YaCySchema.canonical_t, canonical.toNormalform(false, false));
add(doc, YaCySchema.canonical_t, canonical.toNormalform(false));
}
}
@ -638,7 +638,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (refreshURL != null) {
inboundLinks.remove(refreshURL);
outboundLinks.remove(refreshURL);
add(doc, YaCySchema.refresh_s, refreshURL.toNormalform(false, false));
add(doc, YaCySchema.refresh_s, refreshURL.toNormalform(false));
}
} catch (MalformedURLException e) {
add(doc, YaCySchema.refresh_s, refresh);
@ -692,7 +692,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final String name = p.getProperty("name", ""); // the name attribute
final String rel = p.getProperty("rel", ""); // the rel-attribute
final String text = p.getProperty("text", ""); // the text between the <a></a> tag
final String urls = url.toNormalform(false, false);
final String urls = url.toNormalform(false);
final int pr = urls.indexOf("://",0);
inboundlinksURLProtocol.add(urls.substring(0, pr));
inboundlinksURLStub.add(urls.substring(pr + 3));
@ -702,7 +702,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
inboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
inboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
inboundlinksTag.add(
"<a href=\"" + url.toNormalform(false, false) + "\"" +
"<a href=\"" + url.toNormalform(false) + "\"" +
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +
(name.length() > 0 ? " name=\"" + name + "\"" : "") +
">" +
@ -740,7 +740,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final String name = p.getProperty("name", ""); // the name attribute
final String rel = p.getProperty("rel", ""); // the rel-attribute
final String text = p.getProperty("text", ""); // the text between the <a></a> tag
final String urls = url.toNormalform(false, false);
final String urls = url.toNormalform(false);
final int pr = urls.indexOf("://",0);
outboundlinksURLProtocol.add(urls.substring(0, pr));
outboundlinksURLStub.add(urls.substring(pr + 3));
@ -750,7 +750,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
outboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
outboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
outboundlinksTag.add(
"<a href=\"" + url.toNormalform(false, false) + "\"" +
"<a href=\"" + url.toNormalform(false) + "\"" +
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +
(name.length() > 0 ? " name=\"" + name + "\"" : "") +
">" +
@ -898,7 +898,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
public SolrInputDocument err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
final SolrInputDocument solrdoc = new SolrInputDocument();
add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true));
final InetAddress address = digestURI.getInetAddress();
if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());

View File

@ -703,7 +703,7 @@ public final class RWIProcess extends Thread
}
}
final String pageurl = page.url().toNormalform(true, true);
final String pageurl = page.url().toNormalform(true);
final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase();

View File

@ -309,7 +309,7 @@ public class SnippetProcess {
r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
// prefer hit with 'prefer' pattern
if (this.query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) {
if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) {
r += 256 << this.query.ranking.coeff_prefer;
}
if (this.query.prefer.matcher(rentry.title()).matches()) {
@ -317,7 +317,7 @@ public class SnippetProcess {
}
// apply 'common-sense' heuristic using references
final String urlstring = rentry.url().toNormalform(true, true);
final String urlstring = rentry.url().toNormalform(true);
final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
int tc;
@ -491,7 +491,7 @@ public class SnippetProcess {
break; // no more available
}
this.setName(page.url().toNormalform(true, false)); // to support debugging
this.setName(page.url().toNormalform(true)); // to support debugging
if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) {
continue;
}
@ -618,7 +618,7 @@ public class SnippetProcess {
if (this.deleteIfSnippetFail) {
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.query_include_hashes, reason);
}
log.logInfo("sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
log.logInfo("sorted out url " + page.url().toNormalform(true) + " during search: " + reason);
return null;
}
}

View File

@ -180,7 +180,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
url = new DigestURI(entry.getKey());
desc = entry.getValue();
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
result.add(new MediaSnippet(mediatype, url, Classification.url2mime(url), desc, document.getTextLength(), null, ranking, source));
@ -210,7 +210,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
if (ientry.width() > 0 && ientry.width() < 32) continue;
desc = ientry.alt();
final int appcount = queryhashes.size() * 2 -
removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
removeAppearanceHashes(url.toNormalform(true), queryhashes).size() -
removeAppearanceHashes(desc, queryhashes).size();
final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
result.add(new MediaSnippet(ContentDomain.IMAGE, url, Classification.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));

View File

@ -131,10 +131,10 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
return this.urlentry.flags();
}
public String urlstring() {
return (this.alternative_urlstring == null) ? this.urlentry.url().toNormalform(false, true) : this.alternative_urlstring;
return (this.alternative_urlstring == null) ? this.urlentry.url().toNormalform(true) : this.alternative_urlstring;
}
public String urlname() {
return (this.alternative_urlname == null) ? MultiProtocolURI.unescape(this.urlentry.url().toNormalform(false, true)) : this.alternative_urlname;
return (this.alternative_urlname == null) ? MultiProtocolURI.unescape(this.urlentry.url().toNormalform(true)) : this.alternative_urlname;
}
public String title() {
return this.urlentry.dc_title();

View File

@ -27,7 +27,7 @@ public class AugmentedHtmlStream extends FilterOutputStream {
this.buffer = new ByteArrayOutputStream();
this.charset = charset;
this.url = url;
this.urls = this.url.toNormalform(false, true);
this.urls = this.url.toNormalform(false);
this.requestHeader = requestHeader;
}

View File

@ -315,7 +315,7 @@ public final class HTTPDProxyHandler {
//redirector
if (redirectorEnabled){
synchronized(redirectorProcess){
redirectorWriter.println(url.toNormalform(false, true));
redirectorWriter.println(url.toNormalform(true));
redirectorWriter.flush();
}
final String newUrl = redirectorReader.readLine();