mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
removed the option to prevent removal of & parts inside of the
MultiProtocolURI during normalform computation because that should always be done and also be done during initialization of the MultiProtocolURI Object. The new normalform method takes only one argument which should be 'true' unless you know exactly what you are doing.
This commit is contained in:
parent
53789555b9
commit
5f0ab25382
|
@ -199,7 +199,7 @@ public class Bookmarks {
|
|||
if (urlentry != null) try {
|
||||
final Document document = Document.mergeDocuments(urlentry.url(), null, sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay));
|
||||
prop.put("mode_edit", "0"); // create mode
|
||||
prop.put("mode_url", urlentry.url().toNormalform(false, true));
|
||||
prop.put("mode_url", urlentry.url().toNormalform(false));
|
||||
prop.putHTML("mode_title", urlentry.dc_title());
|
||||
prop.putHTML("mode_description", (document == null) ? urlentry.dc_title(): document.dc_title());
|
||||
prop.putHTML("mode_author", urlentry.dc_creator());
|
||||
|
|
|
@ -65,7 +65,7 @@ public class Collage {
|
|||
prop.put("emb", (embed) ? "0" : "1");
|
||||
|
||||
if (nextOrigin != null) {
|
||||
System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true, false));
|
||||
System.out.println("NEXTORIGIN=" + nextOrigin.imageEntry.url().toNormalform(true));
|
||||
if (fifoSize == 0 || origins[fifoPos] != nextOrigin) {
|
||||
fifoPos = fifoPos + 1 >= fifoMax ? 0 : fifoPos + 1;
|
||||
fifoSize = fifoSize + 1 > fifoMax ? fifoMax : fifoSize + 1;
|
||||
|
@ -99,17 +99,17 @@ public class Collage {
|
|||
|
||||
final long z = imgZIndex[i];
|
||||
prop.put("imgurl_list_" + c + "_url",
|
||||
"<a href=\"" + baseURL.toNormalform(true, false) + "\">"
|
||||
+ "<img src=\"" + imageURL.toNormalform(true, false) + "\" "
|
||||
"<a href=\"" + baseURL.toNormalform(true) + "\">"
|
||||
+ "<img src=\"" + imageURL.toNormalform(true) + "\" "
|
||||
+ "style=\""
|
||||
+ ((imgWidth[i] == 0 || imgHeight[i] == 0) ? "" : "width:" + imgWidth[i] + "px;height:" + imgHeight[i] + "px;")
|
||||
+ "position:absolute;top:" + (imgPosY[i] + yOffset)
|
||||
+ "px;left:" + imgPosX[i]
|
||||
+ "px;z-index:" + z + "\" "
|
||||
+ "id=\"col" + z + "\" "
|
||||
+ "alt=\"" + baseURL.toNormalform(true, false) + "\" "
|
||||
+ "alt=\"" + baseURL.toNormalform(true) + "\" "
|
||||
+ "onmouseover=\"raise(" + z + ")\" onmouseout=\"lower(" + z + ")\" "
|
||||
+ "title=\"" + baseURL.toNormalform(true, false) + "\" />"
|
||||
+ "title=\"" + baseURL.toNormalform(true) + "\" />"
|
||||
+ "</a><br />");
|
||||
c++;
|
||||
}
|
||||
|
|
|
@ -78,8 +78,8 @@ public class CrawlCheck_p {
|
|||
StringBuilder s = new StringBuilder(300);
|
||||
int row = 0;
|
||||
for (DigestURI u: rootURLs) {
|
||||
s.append(u.toNormalform(true, true)).append('\n');
|
||||
prop.put("table_list_" + row + "_url", u.toNormalform(true, true));
|
||||
s.append(u.toNormalform(true)).append('\n');
|
||||
prop.put("table_list_" + row + "_url", u.toNormalform(true));
|
||||
|
||||
// try to load the robots
|
||||
RobotsTxtEntry robotsEntry;
|
||||
|
@ -94,7 +94,7 @@ public class CrawlCheck_p {
|
|||
robotsAllowed = !robotsEntry.isDisallowed(u);
|
||||
prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
|
||||
prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms");
|
||||
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true, true));
|
||||
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true));
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
}
|
||||
|
|
|
@ -197,7 +197,7 @@ public class CrawlResults {
|
|||
urltxt = null;
|
||||
continue;
|
||||
}
|
||||
urlstr = urle.url().toNormalform(false, true);
|
||||
urlstr = urle.url().toNormalform(true);
|
||||
urltxt = nxTools.shortenURLString(urlstr, 72); // shorten the string text like a URL
|
||||
|
||||
initiatorSeed = entry.getValue() == null || entry.getValue().initiatorHash == null ? null : sb.peers.getConnected(ASCII.String(entry.getValue().initiatorHash));
|
||||
|
|
|
@ -195,7 +195,7 @@ public class CrawlStartScanner_p
|
|||
if ( url != null ) {
|
||||
String path =
|
||||
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
|
||||
path += "&crawlingURL=" + url.toNormalform(true, false);
|
||||
path += "&crawlingURL=" + url.toNormalform(true);
|
||||
WorkTables.execAPICall(
|
||||
Domains.LOCALHOST,
|
||||
(int) sb.getConfigLong("port", 8090),
|
||||
|
@ -237,7 +237,7 @@ public class CrawlStartScanner_p
|
|||
host = se.next();
|
||||
try {
|
||||
u = new DigestURI(host.getKey().url());
|
||||
urlString = u.toNormalform(true, false);
|
||||
urlString = u.toNormalform(true);
|
||||
if ( host.getValue() == Access.granted
|
||||
&& Scanner.inIndex(apiCommentCache, urlString) == null ) {
|
||||
String path =
|
||||
|
|
|
@ -238,10 +238,10 @@ public class Crawler_p {
|
|||
// store this call as api call
|
||||
if (repeat_time > 0) {
|
||||
// store as scheduled api call
|
||||
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true, false)), repeat_time, repeat_unit.substring(3));
|
||||
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true)), repeat_time, repeat_unit.substring(3));
|
||||
} else {
|
||||
// store just a protocol
|
||||
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true, false)));
|
||||
sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + ((rootURLs.size() == 0) ? post.get("crawlingFile", "") : rootURLs.iterator().next().toNormalform(true)));
|
||||
}
|
||||
|
||||
final boolean crawlingDomMaxCheck = "on".equals(post.get("crawlingDomMaxCheck", "off"));
|
||||
|
@ -327,7 +327,7 @@ public class Crawler_p {
|
|||
try {
|
||||
Pattern mmp = Pattern.compile(newcrawlingMustMatch);
|
||||
for (DigestURI u: rootURLs) {
|
||||
assert mmp.matcher(u.toNormalform(true, true)).matches() : "pattern " + mmp.toString() + " does not match url " + u.toNormalform(true, true);
|
||||
assert mmp.matcher(u.toNormalform(true)).matches() : "pattern " + mmp.toString() + " does not match url " + u.toNormalform(true);
|
||||
}
|
||||
} catch (final PatternSyntaxException e) {
|
||||
prop.put("info", "4"); // crawlfilter does not match url
|
||||
|
@ -570,7 +570,7 @@ public class Crawler_p {
|
|||
return "scraper cannot load URL: " + e.getMessage();
|
||||
}
|
||||
|
||||
final String title = scraper == null ? url.toNormalform(true, true) : scraper.dc_title();
|
||||
final String title = scraper == null ? url.toNormalform(true) : scraper.dc_title();
|
||||
final String description = scraper.dc_description();
|
||||
|
||||
// add the url to the crawl stack
|
||||
|
@ -606,7 +606,7 @@ public class Crawler_p {
|
|||
if (tagStr.length() > 2 && tagStr.startsWith("[") && tagStr.endsWith("]")) tagStr = tagStr.substring(1, tagStr.length() - 2);
|
||||
|
||||
// we will create always a bookmark to use this to track crawled hosts
|
||||
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url.toNormalform(true, false), "admin");
|
||||
final BookmarksDB.Bookmark bookmark = sb.bookmarksDB.createBookmark(url.toNormalform(true), "admin");
|
||||
if (bookmark != null) {
|
||||
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_TITLE, title);
|
||||
bookmark.setProperty(BookmarksDB.Bookmark.BOOKMARK_DESCRIPTION, description);
|
||||
|
|
|
@ -487,7 +487,7 @@ public class IndexControlRWIs_p {
|
|||
if ( url == null ) {
|
||||
continue;
|
||||
}
|
||||
us = url.toNormalform(false, false);
|
||||
us = url.toNormalform(true);
|
||||
if ( rn == -1 ) {
|
||||
rn = entry.ranking();
|
||||
}
|
||||
|
|
|
@ -179,7 +179,7 @@ public class IndexControlURLs_p {
|
|||
if (entry == null) {
|
||||
prop.putHTML("result", "No Entry for URL hash " + urlhash + "; nothing deleted.");
|
||||
} else {
|
||||
urlstring = entry.url().toNormalform(false, true);
|
||||
urlstring = entry.url().toNormalform(true);
|
||||
prop.put("urlstring", "");
|
||||
sb.urlRemove(segment, urlhash.getBytes());
|
||||
prop.putHTML("result", "Removed URL " + urlstring);
|
||||
|
@ -207,7 +207,7 @@ public class IndexControlURLs_p {
|
|||
prop.put("urlhash", urlhash);
|
||||
final URIMetadata entry = segment.fulltext().getMetadata(ASCII.getBytes(urlhash));
|
||||
if (entry == null) {
|
||||
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true, true));
|
||||
prop.putHTML("result", "No Entry for URL " + url.toNormalform(true));
|
||||
prop.putHTML("urlstring", urlstring);
|
||||
prop.put("urlhash", "");
|
||||
} else {
|
||||
|
@ -225,7 +225,7 @@ public class IndexControlURLs_p {
|
|||
if (entry == null) {
|
||||
prop.putHTML("result", "No Entry for URL hash " + urlhash);
|
||||
} else {
|
||||
prop.putHTML("urlstring", entry.url().toNormalform(false, true));
|
||||
prop.putHTML("urlstring", entry.url().toNormalform(true));
|
||||
prop.putAll(genUrlProfile(segment, entry, urlhash));
|
||||
prop.put("statistics", 0);
|
||||
}
|
||||
|
@ -354,13 +354,13 @@ public class IndexControlURLs_p {
|
|||
return prop;
|
||||
}
|
||||
prop.put("genUrlProfile", "2");
|
||||
prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(false, true));
|
||||
prop.putHTML("genUrlProfile_urlNormalform", entry.url().toNormalform(true));
|
||||
prop.put("genUrlProfile_urlhash", urlhash);
|
||||
prop.put("genUrlProfile_urlDescr", entry.dc_title());
|
||||
prop.put("genUrlProfile_moddate", entry.moddate().toString());
|
||||
prop.put("genUrlProfile_loaddate", entry.loaddate().toString());
|
||||
prop.put("genUrlProfile_referrer", (le == null) ? 0 : 1);
|
||||
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.url().toNormalform(false, true));
|
||||
prop.putHTML("genUrlProfile_referrer_url", (le == null) ? "<unknown>" : le.url().toNormalform(true));
|
||||
prop.put("genUrlProfile_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
|
||||
prop.put("genUrlProfile_doctype", String.valueOf(entry.doctype()));
|
||||
prop.put("genUrlProfile_language", entry.language());
|
||||
|
|
|
@ -59,7 +59,7 @@ public class IndexCreateLoaderQueue_p {
|
|||
prop.putHTML("loader-set_list_"+count+"_initiator", ((initiator == null) ? "proxy" : initiator.getName()));
|
||||
prop.put("loader-set_list_"+count+"_depth", element.depth());
|
||||
prop.put("loader-set_list_"+count+"_status", element.getStatus());
|
||||
prop.putHTML("loader-set_list_"+count+"_url", element.url().toNormalform(true, false));
|
||||
prop.putHTML("loader-set_list_"+count+"_url", element.url().toNormalform(true));
|
||||
dark = !dark;
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -89,14 +89,14 @@ public class IndexCreateParserErrors_p {
|
|||
executorSeed = (executorHash == null) ? null : sb.peers.getConnected(ASCII.String(executorHash));
|
||||
prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
|
||||
prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
|
||||
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true));
|
||||
prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false));
|
||||
|
||||
String cause = entry.anycause();
|
||||
if (cause.startsWith(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER)) {
|
||||
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false, true) +
|
||||
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
|
||||
"®ex=" + cause.substring(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER.length()) + "\">test</a>) " + cause);
|
||||
} else if (cause.startsWith(CrawlStacker.ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER)) {
|
||||
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false, true) +
|
||||
prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
|
||||
"®ex=" + cause.substring(CrawlStacker.ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER.length()) + "\">test</a>) " + cause);
|
||||
} else {
|
||||
prop.putHTML("rejected_list_"+j+"_failreason", cause);
|
||||
|
|
|
@ -147,7 +147,7 @@ public class IndexCreateQueues_p {
|
|||
prop.put("crawler_host_" + hc + "_list_" + count + "_depth", request.depth());
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_modified", daydate(request.appdate()) );
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_anchor", request.name());
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_url", request.url().toNormalform(false, true));
|
||||
prop.putHTML("crawler_host_" + hc + "_list_" + count + "_url", request.url().toNormalform(true));
|
||||
prop.put("crawler_host_" + hc + "_list_" + count + "_hash", request.url().hash());
|
||||
count++;
|
||||
}
|
||||
|
|
|
@ -71,7 +71,7 @@ public class IndexImportOAIPMH_p {
|
|||
// set next default url
|
||||
try {
|
||||
final DigestURI nexturl = (rt == null) ? null : rt.resumptionURL();
|
||||
if (rt != null) prop.put("defaulturl", (nexturl == null) ? "" : nexturl.toNormalform(true, false));
|
||||
if (rt != null) prop.put("defaulturl", (nexturl == null) ? "" : nexturl.toNormalform(true));
|
||||
} catch (final MalformedURLException e) {
|
||||
prop.put("defaulturl", e.getMessage());
|
||||
} catch (final IOException e) {
|
||||
|
|
|
@ -203,7 +203,7 @@ public class Load_RSS_p {
|
|||
prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true, false));
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_lastload", DateFormat.getDateTimeInstance().format(row.get("last_load_date", new Date())));
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_nextload", date_next_exec == null ? "" : DateFormat.getDateTimeInstance().format(date_next_exec));
|
||||
|
@ -217,7 +217,7 @@ public class Load_RSS_p {
|
|||
prop.put("shownewfeeds_list_" + newc + "_count", newc);
|
||||
prop.putXML("shownewfeeds_list_" + newc + "_rss", messageurl);
|
||||
prop.putXML("shownewfeeds_list_" + newc + "_title", row.get("title", ""));
|
||||
prop.putXML("shownewfeeds_list_" + newc + "_referrer", referrer == null ? "" : referrer.toNormalform(true, false));
|
||||
prop.putXML("shownewfeeds_list_" + newc + "_referrer", referrer == null ? "" : referrer.toNormalform(true));
|
||||
prop.put("shownewfeeds_list_" + newc + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
|
||||
newc++;
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ public class Load_RSS_p {
|
|||
// if we have an url then try to load the rss
|
||||
RSSReader rss = null;
|
||||
if (url != null) try {
|
||||
prop.put("url", url.toNormalform(true, false));
|
||||
prop.put("url", url.toNormalform(true));
|
||||
final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay);
|
||||
final byte[] resource = response == null ? null : response.getContent();
|
||||
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
|
||||
|
@ -322,7 +322,7 @@ public class Load_RSS_p {
|
|||
prop.putHTML("showitems_item_" + i + "_state_guid", item.getGuid());
|
||||
prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
|
||||
prop.putHTML("showitems_item_" + i + "_title", item.getTitle());
|
||||
prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(false, false));
|
||||
prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(true));
|
||||
prop.putHTML("showitems_item_" + i + "_description", item.getDescription());
|
||||
prop.putHTML("showitems_item_" + i + "_language", item.getLanguage());
|
||||
prop.putHTML("showitems_item_" + i + "_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
|
||||
|
@ -334,10 +334,10 @@ public class Load_RSS_p {
|
|||
}
|
||||
prop.put("showitems_item", i);
|
||||
prop.put("showitems_num", i);
|
||||
prop.putHTML("showitems_rss", url.toNormalform(true, false));
|
||||
prop.putHTML("showitems_rss", url.toNormalform(true));
|
||||
if (i > 0) {
|
||||
prop.put("showload", 1);
|
||||
prop.put("showload_rss", url.toNormalform(true, false));
|
||||
prop.put("showload_rss", url.toNormalform(true));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ public class QuickCrawlLink_p {
|
|||
|
||||
if (crawlingStart != null) {
|
||||
crawlingStart = crawlingStart.trim();
|
||||
try {crawlingStart = new DigestURI(crawlingStart).toNormalform(true, true);} catch (final MalformedURLException e1) {}
|
||||
try {crawlingStart = new DigestURI(crawlingStart).toNormalform(true);} catch (final MalformedURLException e1) {}
|
||||
|
||||
// check if url is proper
|
||||
DigestURI crawlingStartURL = null;
|
||||
|
@ -133,7 +133,7 @@ public class QuickCrawlLink_p {
|
|||
CrawlProfile pe = null;
|
||||
try {
|
||||
pe = new CrawlProfile(
|
||||
crawlingStartURL.toNormalform(true, false),
|
||||
crawlingStartURL.toNormalform(true),
|
||||
crawlingMustMatch, //crawlerUrlMustMatch
|
||||
crawlingMustNotMatch, //crawlerUrlMustNotMatch
|
||||
CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
|
||||
|
|
|
@ -64,7 +64,7 @@ public class ServerScannerList {
|
|||
host = se.next();
|
||||
try {
|
||||
u = new DigestURI(host.getKey().url());
|
||||
urlString = u.toNormalform(true, false);
|
||||
urlString = u.toNormalform(true);
|
||||
prop.put("servertable_list_" + i + "_edit", edit ? 1 : 0);
|
||||
prop.put("servertable_list_" + i + "_edit_pk", ASCII.String(u.hash()));
|
||||
prop.put("servertable_list_" + i + "_edit_count", i);
|
||||
|
|
|
@ -64,7 +64,7 @@ public class SettingsAck_p {
|
|||
|
||||
// get referer for backlink
|
||||
final MultiProtocolURI referer = header.referer();
|
||||
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true, true));
|
||||
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true));
|
||||
//if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString());
|
||||
|
||||
if (post == null) {
|
||||
|
|
|
@ -157,7 +157,7 @@ public class ViewFile {
|
|||
prop.put("url", "");
|
||||
return prop;
|
||||
}
|
||||
prop.put("url", url.toNormalform(false, true));
|
||||
prop.put("url", url.toNormalform(true));
|
||||
|
||||
// loading the resource content as byte array
|
||||
prop.put("error_incache", Cache.has(url.hash()) ? 1 : 0);
|
||||
|
@ -200,7 +200,7 @@ public class ViewFile {
|
|||
|
||||
} else if (viewMode.equals("iframeWeb")) {
|
||||
prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_WEB);
|
||||
prop.put("viewMode_url", url.toNormalform(false, true));
|
||||
prop.put("viewMode_url", url.toNormalform(true));
|
||||
|
||||
} else if (viewMode.equals("iframeCache")) {
|
||||
prop.put("viewMode", VIEW_MODE_AS_IFRAME_FROM_CACHE);
|
||||
|
@ -209,10 +209,10 @@ public class ViewFile {
|
|||
prop.put("viewMode_html", 0);
|
||||
if (ext.length() > 0 && "jpg.jpeg.png.gif".indexOf(ext) >= 0) {
|
||||
prop.put("viewMode_png", 1);
|
||||
prop.put("viewMode_png_url", url.toNormalform(false, true));
|
||||
prop.put("viewMode_png_url", url.toNormalform(true));
|
||||
} else {
|
||||
prop.put("viewMode_html", 1);
|
||||
prop.put("viewMode_html_url", url.toNormalform(false, true));
|
||||
prop.put("viewMode_html_url", url.toNormalform(true));
|
||||
}
|
||||
} else if (viewMode.equals("parsed") || viewMode.equals("sentences") || viewMode.equals("words") || viewMode.equals("links")) {
|
||||
// parsing the resource content
|
||||
|
@ -317,8 +317,8 @@ public class ViewFile {
|
|||
prop.put("viewMode_links_" + i + "_dark", dark ? "1" : "0");
|
||||
prop.put("viewMode_links_" + i + "_type", "image");
|
||||
prop.put("viewMode_links_" + i + "_text", (entry.alt().isEmpty()) ? " " : markup(wordArray, entry.alt()));
|
||||
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(false, true));
|
||||
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(false, true)));
|
||||
prop.put("viewMode_links_" + i + "_url", entry.url().toNormalform(true));
|
||||
prop.put("viewMode_links_" + i + "_link", markup(wordArray, entry.url().toNormalform(true)));
|
||||
if (entry.width() > 0 && entry.height() > 0) {
|
||||
prop.put("viewMode_links_" + i + "_rel", entry.width() + "x" + entry.height() + " Pixel");
|
||||
} else {
|
||||
|
@ -336,7 +336,7 @@ public class ViewFile {
|
|||
if (document != null) document.close();
|
||||
}
|
||||
prop.put("error", "0");
|
||||
prop.put("error_url", url.toNormalform(false, true));
|
||||
prop.put("error_url", url.toNormalform(true));
|
||||
prop.put("error_hash", urlHash);
|
||||
prop.put("error_wordCount", wordCount);
|
||||
prop.putHTML("error_desc", (descr.isEmpty()) ? " " : descr);
|
||||
|
@ -447,8 +447,8 @@ public class ViewFile {
|
|||
prop.put("viewMode_links_" + c + "_dark", ((dark) ? 1 : 0));
|
||||
prop.putHTML("viewMode_links_" + c + "_type", type);
|
||||
prop.put("viewMode_links_" + c + "_text", text);
|
||||
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true, false)));
|
||||
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true, false));
|
||||
prop.put("viewMode_links_" + c + "_link", markup(wordArray, entry.getKey().toNormalform(true)));
|
||||
prop.put("viewMode_links_" + c + "_url", entry.getKey().toNormalform(true));
|
||||
prop.put("viewMode_links_" + c + "_rel", rel);
|
||||
prop.put("viewMode_links_" + c + "_name", name);
|
||||
dark = !dark;
|
||||
|
|
|
@ -81,7 +81,7 @@ public class ViewImage {
|
|||
|
||||
if ((url == null) && (urlLicense.length() > 0)) {
|
||||
url = sb.licensedURLs.releaseLicense(urlLicense);
|
||||
urlString = (url == null) ? null : url.toNormalform(true, true);
|
||||
urlString = (url == null) ? null : url.toNormalform(true);
|
||||
}
|
||||
|
||||
if (urlString == null) return null;
|
||||
|
|
|
@ -74,7 +74,7 @@ public class Vocabulary_p {
|
|||
String t;
|
||||
while (ui.hasNext()) {
|
||||
DigestURI u = ui.next();
|
||||
String u0 = u.toNormalform(true, false);
|
||||
String u0 = u.toNormalform(true);
|
||||
t = "";
|
||||
if (discoverFromPath) {
|
||||
t = u0.substring(discoverobjectspace.length());
|
||||
|
@ -129,7 +129,7 @@ public class Vocabulary_p {
|
|||
if (post.get("add_new", "").equals("checked") && post.get("newterm", "").length() > 0) {
|
||||
String objectlink = post.get("newobjectlink", "");
|
||||
if (objectlink.length() > 0) try {
|
||||
objectlink = new MultiProtocolURI(objectlink).toNormalform(true, false);
|
||||
objectlink = new MultiProtocolURI(objectlink).toNormalform(true);
|
||||
} catch (MalformedURLException e) {}
|
||||
vocabulary.put(post.get("newterm", ""), post.get("newsynonyms", ""), objectlink);
|
||||
}
|
||||
|
|
|
@ -133,9 +133,9 @@ public class getpageinfo {
|
|||
count = 0;
|
||||
for (final MultiProtocolURI uri: uris) {
|
||||
if (uri == null) continue;
|
||||
links.append(';').append(uri.toNormalform(true, false));
|
||||
links.append(';').append(uri.toNormalform(true));
|
||||
filter.append('|').append(uri.getProtocol()).append("://").append(uri.getHost()).append(".*");
|
||||
prop.putXML("links_" + count + "_link", uri.toNormalform(true, false));
|
||||
prop.putXML("links_" + count + "_link", uri.toNormalform(true));
|
||||
count++;
|
||||
}
|
||||
prop.put("links", count);
|
||||
|
|
|
@ -133,9 +133,9 @@ public class getpageinfo_p {
|
|||
count = 0;
|
||||
for (final MultiProtocolURI uri: uris) {
|
||||
if (uri == null) continue;
|
||||
links.append(';').append(uri.toNormalform(true, false));
|
||||
links.append(';').append(uri.toNormalform(true));
|
||||
filter.append('|').append(uri.getProtocol()).append("://").append(uri.getHost()).append(".*");
|
||||
prop.putXML("links_" + count + "_link", uri.toNormalform(true, false));
|
||||
prop.putXML("links_" + count + "_link", uri.toNormalform(true));
|
||||
count++;
|
||||
}
|
||||
prop.put("links", count);
|
||||
|
|
|
@ -109,13 +109,13 @@ public class webstructure {
|
|||
prop.put("references_documents_0_count", scraper.inboundLinks().size() + scraper.outboundLinks().size());
|
||||
prop.put("references_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date()));
|
||||
prop.put("references_documents_0_urle", url == null ? 0 : 1);
|
||||
if (url != null) prop.putXML("references_documents_0_urle_url", url.toNormalform(true, false));
|
||||
if (url != null) prop.putXML("references_documents_0_urle_url", url.toNormalform(true));
|
||||
int d = 0;
|
||||
Iterator<MultiProtocolURI> i = scraper.inboundLinks().iterator();
|
||||
while (i.hasNext()) {
|
||||
DigestURI refurl = new DigestURI(i.next());
|
||||
byte[] refhash = refurl.hash();
|
||||
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
||||
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true));
|
||||
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
||||
prop.put("references_documents_0_anchors_" + d + "_outbound", 0);
|
||||
d++;
|
||||
|
@ -124,7 +124,7 @@ public class webstructure {
|
|||
while (i.hasNext()) {
|
||||
DigestURI refurl = new DigestURI(i.next());
|
||||
byte[] refhash = refurl.hash();
|
||||
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true, false));
|
||||
prop.putXML("references_documents_0_anchors_" + d + "_url", refurl.toNormalform(true));
|
||||
prop.put("references_documents_0_anchors_" + d + "_hash", refhash);
|
||||
prop.put("references_documents_0_anchors_" + d + "_outbound", 1);
|
||||
d++;
|
||||
|
@ -152,7 +152,7 @@ public class webstructure {
|
|||
prop.put("citations_documents_0_count", citations.size());
|
||||
prop.put("citations_documents_0_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(citations.lastWrote())));
|
||||
prop.put("citations_documents_0_urle", url == null ? 0 : 1);
|
||||
if (url != null) prop.putXML("citations_documents_0_urle_url", url.toNormalform(true, false));
|
||||
if (url != null) prop.putXML("citations_documents_0_urle_url", url.toNormalform(true));
|
||||
int d = 0;
|
||||
Iterator<CitationReference> i = citations.entries();
|
||||
while (i.hasNext()) {
|
||||
|
@ -160,7 +160,7 @@ public class webstructure {
|
|||
byte[] refhash = cr.urlhash();
|
||||
DigestURI refurl = authenticated ? sb.getURL(refhash) : null;
|
||||
prop.put("citations_documents_0_anchors_" + d + "_urle", refurl == null ? 0 : 1);
|
||||
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true, false));
|
||||
if (refurl != null) prop.putXML("citations_documents_0_anchors_" + d + "_urle_url", refurl.toNormalform(true));
|
||||
prop.put("citations_documents_0_anchors_" + d + "_urle_hash", refhash);
|
||||
prop.put("citations_documents_0_anchors_" + d + "_urle_date", GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(cr.lastModified())));
|
||||
d++;
|
||||
|
|
|
@ -115,7 +115,7 @@ public class yacydoc {
|
|||
prop.putXML("dc_contributor", "");
|
||||
prop.putXML("dc_date", ISO8601Formatter.FORMATTER.format(entry.moddate()));
|
||||
prop.putXML("dc_type", String.valueOf(entry.doctype()));
|
||||
prop.putXML("dc_identifier", entry.url().toNormalform(false, true));
|
||||
prop.putXML("dc_identifier", entry.url().toNormalform(true));
|
||||
prop.putXML("dc_language", ASCII.String(entry.language()));
|
||||
prop.putXML("collection", Arrays.toString(entry.collections()));
|
||||
prop.put("geo_lat", entry.lat());
|
||||
|
@ -124,7 +124,7 @@ public class yacydoc {
|
|||
prop.put("yacy_urlhash", entry.url().hash());
|
||||
prop.putXML("yacy_loaddate", entry.loaddate().toString());
|
||||
prop.putXML("yacy_referrer_hash", (le == null) ? "" : ASCII.String(le.hash()));
|
||||
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(false, true));
|
||||
prop.putXML("yacy_referrer_url", (le == null) ? "" : le.url().toNormalform(true));
|
||||
prop.put("yacy_size", entry.size());
|
||||
prop.put("yacy_words", entry.wordCount());
|
||||
prop.put("yacy_citations", sb.index.urlCitation().count(entry.hash()));
|
||||
|
|
|
@ -51,7 +51,7 @@ public class cytag {
|
|||
StringBuilder connect = new StringBuilder();
|
||||
connect.append('{');
|
||||
appendJSON(connect, "time", GenericFormatter.SHORT_MILSEC_FORMATTER.format());
|
||||
appendJSON(connect, "trail", (referer == null) ? "" : referer.toNormalform(false, false));
|
||||
appendJSON(connect, "trail", (referer == null) ? "" : referer.toNormalform(false));
|
||||
appendJSON(connect, "nick", (post == null) ? "" : post.get("nick", ""));
|
||||
appendJSON(connect, "tag", (post == null) ? "" : post.get("tag", ""));
|
||||
appendJSON(connect, "icon", (post == null) ? "" : post.get("icon", ""));
|
||||
|
|
|
@ -102,7 +102,7 @@ public class rct_p {
|
|||
* @return
|
||||
*/
|
||||
private static String urlToString(final DigestURI url) {
|
||||
return (url == null ? "null" : url.toNormalform(true, false));
|
||||
return (url == null ? "null" : url.toNormalform(true));
|
||||
}
|
||||
|
||||
private static void listHosts(final Switchboard sb, final serverObjects prop) {
|
||||
|
|
|
@ -140,7 +140,7 @@ public final class crawlReceipt {
|
|||
// Check URL against DHT blacklist
|
||||
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
|
||||
// URL is blacklisted
|
||||
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam);
|
||||
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false) + " from peer " + iam);
|
||||
prop.put("delay", "9999");
|
||||
return prop;
|
||||
}
|
||||
|
@ -150,7 +150,7 @@ public final class crawlReceipt {
|
|||
sb.index.fulltext().putMetadata(entry);
|
||||
ResultURLs.stack(entry, youare.getBytes(), iam.getBytes(), EventOrigin.REMOTE_RECEIPTS);
|
||||
sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work has been done
|
||||
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true));
|
||||
if (log.isInfo()) log.logInfo("crawlReceipt: RECEIVED RECEIPT from " + otherPeerName + " for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false));
|
||||
|
||||
// ready for more
|
||||
prop.put("delay", "10");
|
||||
|
|
|
@ -123,7 +123,7 @@ public final class transferURL {
|
|||
|
||||
// check if the entry is blacklisted
|
||||
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
|
||||
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
|
||||
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
|
||||
lEntry = null;
|
||||
blocked++;
|
||||
continue;
|
||||
|
@ -147,11 +147,11 @@ public final class transferURL {
|
|||
}
|
||||
|
||||
// write entry to database
|
||||
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true, false));
|
||||
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true));
|
||||
try {
|
||||
sb.index.fulltext().putMetadata(lEntry);
|
||||
ResultURLs.stack(lEntry, iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
|
||||
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
|
||||
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
|
||||
received++;
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
|
|
|
@ -91,8 +91,8 @@ public class urls {
|
|||
|
||||
// create RSS entry
|
||||
prop.put("item_" + c + "_title", "");
|
||||
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
|
||||
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
|
||||
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true));
|
||||
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true));
|
||||
prop.putXML("item_" + c + "_description", entry.name());
|
||||
prop.put("item_" + c + "_author", "");
|
||||
prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.appdate()));
|
||||
|
@ -119,8 +119,8 @@ public class urls {
|
|||
referrer = sb.getURL(entry.referrerHash());
|
||||
// create RSS entry
|
||||
prop.put("item_" + c + "_title", entry.dc_title());
|
||||
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true, false));
|
||||
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true, false));
|
||||
prop.putXML("item_" + c + "_link", entry.url().toNormalform(true));
|
||||
prop.putXML("item_" + c + "_referrer", (referrer == null) ? "" : referrer.toNormalform(true));
|
||||
prop.putXML("item_" + c + "_description", entry.dc_title());
|
||||
prop.put("item_" + c + "_author", entry.dc_creator());
|
||||
prop.put("item_" + c + "_pubDate", GenericFormatter.SHORT_SECOND_FORMATTER.format(entry.moddate()));
|
||||
|
|
|
@ -668,7 +668,7 @@ public class yacysearch {
|
|||
if ( documents != null ) {
|
||||
// create a news message
|
||||
final Map<String, String> map = new HashMap<String, String>();
|
||||
map.put("url", urlentry.url().toNormalform(false, true).replace(',', '|'));
|
||||
map.put("url", urlentry.url().toNormalform(true).replace(',', '|'));
|
||||
map.put("title", urlentry.dc_title().replace(',', ' '));
|
||||
map.put("description", documents[0].dc_title().replace(',', ' '));
|
||||
map.put("author", documents[0].dc_creator());
|
||||
|
|
|
@ -262,7 +262,7 @@ public class yacysearchitem {
|
|||
if (ms == null) {
|
||||
prop.put("content_item", "0");
|
||||
} else {
|
||||
final String resultUrlstring = ms.url().toNormalform(true, false);
|
||||
final String resultUrlstring = ms.url().toNormalform(true);
|
||||
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
|
||||
|
||||
final String license = sb.licensedURLs.aquireLicense(ms.url());
|
||||
|
@ -278,8 +278,8 @@ public class yacysearchitem {
|
|||
prop.put("content_item_height", 0);
|
||||
prop.put("content_item_attr", ""/*(ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"*/); // attributes, here: original size of image
|
||||
prop.put("content_item_urlhash", ASCII.String(ms.url().hash()));
|
||||
prop.put("content_item_source", ms.url().toNormalform(true, false));
|
||||
prop.putXML("content_item_source-xml", ms.url().toNormalform(true, false));
|
||||
prop.put("content_item_source", ms.url().toNormalform(true));
|
||||
prop.putXML("content_item_source-xml", ms.url().toNormalform(true));
|
||||
prop.put("content_item_sourcedom", ms.url().getHost());
|
||||
prop.put("content_item_nl", (item == theQuery.offset) ? 0 : 1);
|
||||
prop.put("content_item", 1);
|
||||
|
@ -299,7 +299,7 @@ public class yacysearchitem {
|
|||
if (ms == null) {
|
||||
prop.put("content_item", "0");
|
||||
} else {
|
||||
final String resultUrlstring = ms.url().toNormalform(true, false);
|
||||
final String resultUrlstring = ms.url().toNormalform(true);
|
||||
final String target = sb.getConfig(resultUrlstring.matches(target_special_pattern) ? SwitchboardConstants.SEARCH_TARGET_SPECIAL : SwitchboardConstants.SEARCH_TARGET_DEFAULT, "_self");
|
||||
prop.putHTML("content_item_href", resultUrlstring);
|
||||
prop.putHTML("content_item_hrefshort", nxTools.shortenURLString(resultUrlstring, MAX_URL_LENGTH));
|
||||
|
|
|
@ -61,6 +61,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
|
||||
public static final MultiProtocolURI POISON = new MultiProtocolURI(); // poison pill for concurrent link generators
|
||||
|
||||
private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&"));
|
||||
private static final long serialVersionUID = -1173233022912141884L;
|
||||
private static final long SMB_TIMEOUT = 5000;
|
||||
|
||||
|
@ -628,6 +629,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
this.searchpart = null;
|
||||
} else {
|
||||
this.searchpart = this.path.substring(r + 1);
|
||||
// strip &
|
||||
Matcher matcher = ampPattern.matcher(this.searchpart);
|
||||
while (matcher.find()) {
|
||||
this.searchpart = matcher.replaceAll("&");
|
||||
matcher.reset(this.searchpart);
|
||||
}
|
||||
this.path = this.path.substring(0, r);
|
||||
}
|
||||
}
|
||||
|
@ -808,11 +815,11 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return toNormalform(false, true);
|
||||
return toNormalform(false);
|
||||
}
|
||||
|
||||
public String toTokens() {
|
||||
return toTokens(unescape(this.toNormalform(true, true)));
|
||||
return toTokens(unescape(this.toNormalform(true)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -881,25 +888,11 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
return CharType.high;
|
||||
}
|
||||
|
||||
public String toNormalform(final boolean excludeAnchor, final boolean stripAmp) {
|
||||
return toNormalform(excludeAnchor, stripAmp, false);
|
||||
public String toNormalform(final boolean excludeAnchor) {
|
||||
return toNormalform(excludeAnchor, false);
|
||||
}
|
||||
|
||||
private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&"));
|
||||
|
||||
public String toNormalform(final boolean excludeAnchor, final boolean stripAmp, final boolean removeSessionID) {
|
||||
String result = toNormalform0(excludeAnchor, removeSessionID);
|
||||
if (stripAmp) {
|
||||
Matcher matcher = ampPattern.matcher(result);
|
||||
while (matcher.find()) {
|
||||
result = matcher.replaceAll("&");
|
||||
matcher.reset(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private String toNormalform0(final boolean excludeAnchor, final boolean removeSessionID) {
|
||||
public String toNormalform(final boolean excludeAnchor, final boolean removeSessionID) {
|
||||
// generates a normal form of the URL
|
||||
boolean defaultPort = false;
|
||||
if (this.protocol.equals("mailto")) {
|
||||
|
@ -915,7 +908,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
} else if (isFile()) {
|
||||
defaultPort = true;
|
||||
}
|
||||
final String urlPath = this.getFile(excludeAnchor, removeSessionID);
|
||||
String urlPath = this.getFile(excludeAnchor, removeSessionID);
|
||||
String h = getHost();
|
||||
final StringBuilder u = new StringBuilder(20 + urlPath.length() + ((h == null) ? 0 : h.length()));
|
||||
u.append(this.protocol);
|
||||
|
@ -932,12 +925,14 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
u.append(this.port);
|
||||
}
|
||||
u.append(urlPath);
|
||||
return u.toString();
|
||||
String result = u.toString();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.toNormalform(true, true).hashCode();
|
||||
return this.toNormalform(true).hashCode();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -967,7 +962,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
if (this.userInfo != null && h.userInfo != null && (c = this.userInfo.compareTo(h.userInfo)) != 0) return c;
|
||||
if (this.path != null && h.path != null && (c = this.path.compareTo(h.path)) != 0) return c;
|
||||
if (this.searchpart != null && h.searchpart != null && (c = this.searchpart.compareTo(h.searchpart)) != 0) return c;
|
||||
return toNormalform(true, true).compareTo(h.toNormalform(true, true));
|
||||
return toNormalform(true).compareTo(h.toNormalform(true));
|
||||
}
|
||||
|
||||
public boolean isPOST() {
|
||||
|
@ -1895,7 +1890,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
*/
|
||||
public java.net.URL getURL() throws MalformedURLException {
|
||||
if (!(isHTTP() || isHTTPS() || isFTP())) throw new MalformedURLException();
|
||||
return new java.net.URL(this.toNormalform(false, true));
|
||||
return new java.net.URL(this.toNormalform(false));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1904,7 +1899,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
*/
|
||||
public java.io.File getFSFile() throws MalformedURLException {
|
||||
if (!isFile()) throw new MalformedURLException();
|
||||
return new java.io.File(this.toNormalform(false, true).substring(7));
|
||||
return new java.io.File(this.toNormalform(true).substring(7));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1914,7 +1909,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
*/
|
||||
public SmbFile getSmbFile() throws MalformedURLException {
|
||||
if (!isSMB()) throw new MalformedURLException();
|
||||
final String url = unescape(this.toNormalform(false, true));
|
||||
final String url = unescape(this.toNormalform(true));
|
||||
return new SmbFile(url);
|
||||
}
|
||||
|
||||
|
@ -2188,8 +2183,8 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
|
|||
|
||||
// check stability: the normalform of the normalform must be equal to the normalform
|
||||
if (aURL != null) try {
|
||||
aURL1 = new MultiProtocolURI(aURL.toNormalform(false, true));
|
||||
if (!(aURL1.toNormalform(false, true).equals(aURL.toNormalform(false, true)))) {
|
||||
aURL1 = new MultiProtocolURI(aURL.toNormalform(false));
|
||||
if (!(aURL1.toNormalform(false).equals(aURL.toNormalform(false)))) {
|
||||
System.out.println("no stability for url:");
|
||||
System.out.println("aURL0=" + aURL.toString());
|
||||
System.out.println("aURL1=" + aURL1.toString());
|
||||
|
|
|
@ -56,7 +56,7 @@ public class RSSFeed implements Iterable<RSSMessage> {
|
|||
String u;
|
||||
RSSMessage message;
|
||||
for (MultiProtocolURI uri: links) {
|
||||
u = uri.toNormalform(true, false);
|
||||
u = uri.toNormalform(true);
|
||||
message = new RSSMessage(u, "", u);
|
||||
message.setAuthor(source);
|
||||
this.addMessage(message);
|
||||
|
|
|
@ -113,7 +113,7 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
|
|||
this.map = new HashMap<String, String>();
|
||||
this.map.put("title", title);
|
||||
this.map.put("description", description);
|
||||
this.map.put("link", link.toNormalform(true, false));
|
||||
this.map.put("link", link.toNormalform(true));
|
||||
this.map.put("pubDate", ISO8601Formatter.FORMATTER.format());
|
||||
this.map.put("guid", guid);
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ public class Scanner extends Thread {
|
|||
@Override
|
||||
public String toString() {
|
||||
try {
|
||||
return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true, false);
|
||||
return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true);
|
||||
} catch (final MalformedURLException e) {
|
||||
return "";
|
||||
}
|
||||
|
|
|
@ -341,7 +341,7 @@ public class HTTPClient {
|
|||
*/
|
||||
public byte[] GETbytes(final MultiProtocolURI url, final int maxBytes) throws IOException {
|
||||
final boolean localhost = Domains.isLocalhost(url.getHost());
|
||||
final String urix = url.toNormalform(true, false);
|
||||
final String urix = url.toNormalform(true);
|
||||
final HttpGet httpGet = new HttpGet(urix);
|
||||
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
return getContentBytes(httpGet, maxBytes);
|
||||
|
@ -358,7 +358,7 @@ public class HTTPClient {
|
|||
public void GET(final String uri) throws IOException {
|
||||
if (this.currentRequest != null) throw new IOException("Client is in use!");
|
||||
final MultiProtocolURI url = new MultiProtocolURI(uri);
|
||||
final HttpGet httpGet = new HttpGet(url.toNormalform(true, false));
|
||||
final HttpGet httpGet = new HttpGet(url.toNormalform(true));
|
||||
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
this.currentRequest = httpGet;
|
||||
execute(httpGet);
|
||||
|
@ -373,7 +373,7 @@ public class HTTPClient {
|
|||
*/
|
||||
public HttpResponse HEADResponse(final String uri) throws IOException {
|
||||
final MultiProtocolURI url = new MultiProtocolURI(uri);
|
||||
final HttpHead httpHead = new HttpHead(url.toNormalform(true, false));
|
||||
final HttpHead httpHead = new HttpHead(url.toNormalform(true));
|
||||
setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
execute(httpHead);
|
||||
finish();
|
||||
|
@ -394,7 +394,7 @@ public class HTTPClient {
|
|||
public void POST(final String uri, final InputStream instream, final long length) throws IOException {
|
||||
if (this.currentRequest != null) throw new IOException("Client is in use!");
|
||||
final MultiProtocolURI url = new MultiProtocolURI(uri);
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
|
||||
String host = url.getHost();
|
||||
if (host == null) host = Domains.LOCALHOST;
|
||||
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
|
@ -430,7 +430,7 @@ public class HTTPClient {
|
|||
* @throws IOException
|
||||
*/
|
||||
public byte[] POSTbytes(final MultiProtocolURI url, final String vhost, final Map<String, ContentBody> post, final boolean usegzip) throws IOException {
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
|
||||
|
||||
setHost(vhost); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
if (vhost == null) setHost(Domains.LOCALHOST);
|
||||
|
@ -461,7 +461,7 @@ public class HTTPClient {
|
|||
*/
|
||||
public byte[] POSTbytes(final String uri, final InputStream instream, final long length) throws IOException {
|
||||
final MultiProtocolURI url = new MultiProtocolURI(uri);
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true, false));
|
||||
final HttpPost httpPost = new HttpPost(url.toNormalform(true));
|
||||
String host = url.getHost();
|
||||
if (host == null) host = Domains.LOCALHOST;
|
||||
setHost(host); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
|
||||
|
|
|
@ -209,7 +209,7 @@ public final class CrawlStacker {
|
|||
if (replace) {
|
||||
this.indexSegment.fulltext().remove(urlhash);
|
||||
this.nextQueue.urlRemove(urlhash);
|
||||
String u = url.toNormalform(true, true);
|
||||
String u = url.toNormalform(true);
|
||||
if (u.endsWith("/")) {
|
||||
u = u + "index.html";
|
||||
} else if (!u.contains(".")) {
|
||||
|
@ -393,7 +393,7 @@ public final class CrawlStacker {
|
|||
} else if (remote) {
|
||||
warning = this.nextQueue.noticeURL.push(NoticedURL.StackType.REMOTE, entry);
|
||||
}
|
||||
if (warning != null) this.log.logWarning("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true, false) + " - not pushed: " + warning);
|
||||
if (warning != null) this.log.logWarning("CrawlStacker.stackCrawl of URL " + entry.url().toNormalform(true) + " - not pushed: " + warning);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -208,14 +208,14 @@ public final class Cache {
|
|||
// store the response header into the header database
|
||||
final HashMap<String, String> hm = new HashMap<String, String>();
|
||||
hm.putAll(responseHeader);
|
||||
hm.put("@@URL", url.toNormalform(true, false));
|
||||
hm.put("@@URL", url.toNormalform(true));
|
||||
try {
|
||||
responseHeaderDB.insert(url.hash(), hm);
|
||||
} catch (final Exception e) {
|
||||
fileDB.delete(url.hash());
|
||||
throw new IOException("Cache.store: cannot write to headerDB: " + e.getMessage());
|
||||
}
|
||||
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true, false));
|
||||
if (log.isFine()) log.logFine("stored in cache: " + url.toNormalform(true));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -536,7 +536,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
|
|||
}
|
||||
|
||||
public static String mustMatchSubpath(final MultiProtocolURI uri) {
|
||||
String u = uri.toNormalform(true, true);
|
||||
String u = uri.toNormalform(true);
|
||||
if (!u.endsWith("/")) {int p = u.lastIndexOf("/"); if (p > 0) u = u.substring(0, p + 1);}
|
||||
return new StringBuilder(u.length() + 5).append(Pattern.quote(u)).append(".*").toString();
|
||||
}
|
||||
|
|
|
@ -278,7 +278,7 @@ public class CrawlQueues {
|
|||
}
|
||||
try {
|
||||
this.sb.indexingDocumentProcessor.enQueue(new IndexingQueueEntry(new Response(urlEntry, profile), null, null));
|
||||
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
|
||||
Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true));
|
||||
} catch (final InterruptedException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -545,7 +545,7 @@ public class CrawlQueues {
|
|||
* @return
|
||||
*/
|
||||
private static String urlToString(final DigestURI url) {
|
||||
return (url == null ? "null" : url.toNormalform(true, false));
|
||||
return (url == null ? "null" : url.toNormalform(true));
|
||||
}
|
||||
|
||||
public int limitCrawlJobSize() {
|
||||
|
|
|
@ -65,7 +65,7 @@ public class ResultImages {
|
|||
for (final ImageEntry image: images.values()) {
|
||||
// do a double-check; attention: this can be time-consuming since this possibly needs a DNS-lookup
|
||||
if (image == null || image.url() == null) continue;
|
||||
String url = image.url().toNormalform(true, false);
|
||||
String url = image.url().toNormalform(true);
|
||||
if (doubleCheck.contains(url)) continue;
|
||||
doubleCheck.add(url);
|
||||
|
||||
|
|
|
@ -170,14 +170,14 @@ public class ZURL implements Iterable<ZURL.Entry> {
|
|||
final Entry entry = new Entry(bentry, executor, workdate, workcount, reason);
|
||||
put(entry);
|
||||
this.stack.add(entry.hash());
|
||||
if (!reason.startsWith("double")) log.logInfo(bentry.url().toNormalform(false, false) + " - " + reason);
|
||||
if (!reason.startsWith("double")) log.logInfo(bentry.url().toNormalform(true) + " - " + reason);
|
||||
if (this.solrConnector != null && failCategory.store) {
|
||||
// send the error to solr
|
||||
try {
|
||||
SolrInputDocument errorDoc = this.solrConfiguration.err(bentry.url(), failCategory.name() + " " + reason, httpcode);
|
||||
this.solrConnector.add(errorDoc);
|
||||
} catch (final IOException e) {
|
||||
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
|
||||
Log.logWarning("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
while (this.stack.size() > maxStackSize) this.stack.poll();
|
||||
|
|
|
@ -117,7 +117,7 @@ public class FTPLoader {
|
|||
final RequestHeader requestHeader = new RequestHeader();
|
||||
if (request.referrerhash() != null) {
|
||||
final DigestURI u = this.sb.getURL(request.referrerhash());
|
||||
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true, false));
|
||||
if (u != null) requestHeader.put(RequestHeader.REFERER, u.toNormalform(true));
|
||||
}
|
||||
|
||||
final StringBuilder dirList = ftpClient.dirhtml(path);
|
||||
|
@ -224,7 +224,7 @@ public class FTPLoader {
|
|||
final RequestHeader requestHeader = new RequestHeader();
|
||||
if (request.referrerhash() != null) {
|
||||
final DigestURI refurl = this.sb.getURL(request.referrerhash());
|
||||
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
|
||||
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true));
|
||||
}
|
||||
final ResponseHeader responseHeader = new ResponseHeader(200);
|
||||
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(fileDate));
|
||||
|
|
|
@ -63,14 +63,14 @@ public class FileLoader {
|
|||
RequestHeader requestHeader = new RequestHeader();
|
||||
if (request.referrerhash() != null) {
|
||||
DigestURI ur = this.sb.getURL(request.referrerhash());
|
||||
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
|
||||
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true));
|
||||
}
|
||||
|
||||
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
|
||||
String[] l = null;
|
||||
try {l = url.list();} catch (IOException e) {}
|
||||
if (l != null) {
|
||||
String u = url.toNormalform(true, true);
|
||||
String u = url.toNormalform(true);
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (String s: l) {
|
||||
list.add(u + ((u.endsWith("/") || u.endsWith("\\")) ? "" : "/") + s);
|
||||
|
|
|
@ -119,7 +119,7 @@ public final class HTTPLoader {
|
|||
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
|
||||
DigestURI refererURL = null;
|
||||
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
||||
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
|
||||
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
|
||||
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
|
||||
|
@ -135,7 +135,7 @@ public final class HTTPLoader {
|
|||
final byte[] responseBody = client.GETbytes(url, maxFileSize);
|
||||
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
|
||||
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
|
||||
String requestURLString = request.url().toNormalform(false, false);
|
||||
String requestURLString = request.url().toNormalform(true);
|
||||
|
||||
// check redirection
|
||||
if (statusCode > 299 && statusCode < 310) {
|
||||
|
|
|
@ -70,11 +70,11 @@ public class RSSLoader extends Thread {
|
|||
Log.logWarning("Load_RSS", "rss loading for url '" + getName().substring(9) + "' failed: " + e.getMessage());
|
||||
return;
|
||||
} catch (final IOException e) {
|
||||
Log.logWarning("Load_RSS", "rss loading for url '" + this.urlf.toNormalform(true, false) + "' failed: " + e.getMessage());
|
||||
Log.logWarning("Load_RSS", "rss loading for url '" + this.urlf.toNormalform(true) + "' failed: " + e.getMessage());
|
||||
return;
|
||||
}
|
||||
if (rss == null) {
|
||||
Log.logWarning("Load_RSS", "no rss for url " + this.urlf.toNormalform(true, false));
|
||||
Log.logWarning("Load_RSS", "no rss for url " + this.urlf.toNormalform(true));
|
||||
return;
|
||||
}
|
||||
final RSSFeed feed = rss.getFeed();
|
||||
|
@ -111,7 +111,7 @@ public class RSSLoader extends Thread {
|
|||
final int lastAvg = rssRow.get("avg_upd_per_day", 0);
|
||||
final long thisAvg = 1000 * 60 * 60 * 24 / deltaTime * loadCount;
|
||||
final long nextAvg = lastAvg == 0 ? thisAvg : (thisAvg + lastAvg * 2) / 3;
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true)));
|
||||
rssRow.put("title", feed.getChannel().getTitle());
|
||||
rssRow.put("last_load_date", new Date());
|
||||
rssRow.put("last_load_count", loadCount);
|
||||
|
@ -130,20 +130,20 @@ public class RSSLoader extends Thread {
|
|||
// record API action
|
||||
byte[] pk = null;
|
||||
final serverObjects post = new serverObjects();
|
||||
post.put("url", url.toNormalform(true, false));
|
||||
post.put("url", url.toNormalform(true));
|
||||
post.put("indexAllItemContent", "");
|
||||
if (apicall_pk != null) post.put(WorkTables.TABLE_API_COL_APICALL_PK, apicall_pk);
|
||||
if (repeat_time > 0) {
|
||||
// store as scheduled api call
|
||||
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false), repeat_time, repeat_unit.substring(3));
|
||||
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true), repeat_time, repeat_unit.substring(3));
|
||||
} else {
|
||||
// store just a protocol
|
||||
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false));
|
||||
pk = sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true));
|
||||
}
|
||||
// store pk of api table into rss table to show that the entry has been recorded
|
||||
assert pk != null;
|
||||
final Tables.Data rssRow = new Tables.Data();
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true, false)));
|
||||
rssRow.put("url", UTF8.getBytes(url.toNormalform(true)));
|
||||
rssRow.put("title", feed.getChannel().getTitle());
|
||||
rssRow.put("api_pk", pk);
|
||||
try {
|
||||
|
|
|
@ -75,14 +75,14 @@ public class SMBLoader {
|
|||
RequestHeader requestHeader = new RequestHeader();
|
||||
if (request.referrerhash() != null) {
|
||||
DigestURI ur = this.sb.getURL(request.referrerhash());
|
||||
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
|
||||
if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true));
|
||||
}
|
||||
|
||||
// process directories: transform them to html with meta robots=noindex (using the ftpc lib)
|
||||
String[] l = null;
|
||||
try {l = url.list();} catch (IOException e) {}
|
||||
if (l != null) {
|
||||
String u = url.toNormalform(true, true);
|
||||
String u = url.toNormalform(true);
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (String s: l) {
|
||||
if (s.startsWith(".")) continue;
|
||||
|
|
|
@ -307,7 +307,7 @@ public class RobotsTxt {
|
|||
reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
|
||||
|
||||
// adding referer
|
||||
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
|
||||
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true));
|
||||
reqHeaders.put(HeaderFramework.ACCEPT, HTTPLoader.DEFAULT_ACCEPT);
|
||||
if (entry != null) {
|
||||
oldEtag = entry.getETag();
|
||||
|
|
|
@ -496,7 +496,7 @@ public class BookmarksDB {
|
|||
public Bookmark(final DigestURI url) {
|
||||
this.entry = new HashMap<String, String>();
|
||||
this.urlHash = ASCII.String(url.hash());
|
||||
this.entry.put(BOOKMARK_URL, url.toNormalform(false, true));
|
||||
this.entry.put(BOOKMARK_URL, url.toNormalform(false));
|
||||
this.tagNames = new HashSet<String>();
|
||||
this.timestamp = System.currentTimeMillis();
|
||||
final Bookmark oldBm=getBookmark(this.urlHash);
|
||||
|
|
|
@ -313,7 +313,7 @@ public class WorkTables extends Tables {
|
|||
// create and insert new entry
|
||||
Data data = new Data();
|
||||
byte[] date = UTF8.getBytes(GenericFormatter.SHORT_MILSEC_FORMATTER.format());
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true, false));
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_URL, url.toNormalform(true));
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_DATE, date);
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_WORDS, queryHashes.export());
|
||||
data.put(TABLE_SEARCH_FAILURE_COL_COMMENT, UTF8.getBytes(reason));
|
||||
|
|
|
@ -173,7 +173,7 @@ public class YMarkCrawlStart extends HashMap<String,String>{
|
|||
final int depth,
|
||||
final boolean crawlingQ, final boolean medialink) {
|
||||
final CrawlProfile pe = new CrawlProfile(
|
||||
(startURL.getHost() == null) ? startURL.toNormalform(true, false) : startURL.getHost(),
|
||||
(startURL.getHost() == null) ? startURL.toNormalform(true) : startURL.getHost(),
|
||||
urlMustMatch,
|
||||
urlMustNotMatch,
|
||||
CrawlProfile.MATCH_ALL_STRING,
|
||||
|
|
|
@ -372,7 +372,7 @@ public class YMarkTables {
|
|||
final YMarkMetadata meta = new YMarkMetadata(url);
|
||||
final Document document = meta.loadDocument(loader);
|
||||
final EnumMap<YMarkMetadata.METADATA, String> metadata = meta.loadMetadata();
|
||||
final String urls = url.toNormalform(true, false);
|
||||
final String urls = url.toNormalform(true);
|
||||
bmk_entry.put(YMarkEntry.BOOKMARK.URL.key(), urls);
|
||||
if(!this.worktables.has(YMarkTables.TABLES.BOOKMARKS.tablename(bmk_user), YMarkUtil.getBookmarkId(urls))) {
|
||||
bmk_entry.put(YMarkEntry.BOOKMARK.PUBLIC.key(), "false");
|
||||
|
|
|
@ -159,7 +159,7 @@ public final class Condenser {
|
|||
Iterator<Map.Entry<MultiProtocolURI, String>> i = document.getAudiolinks().entrySet().iterator();
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasaudio, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasaudio, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasaudio, this.RESULT_FLAGS, true, meaningLib);
|
||||
}
|
||||
|
||||
|
@ -167,7 +167,7 @@ public final class Condenser {
|
|||
i = document.getVideolinks().entrySet().iterator();
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasvideo, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasvideo, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasvideo, this.RESULT_FLAGS, true, meaningLib);
|
||||
}
|
||||
|
||||
|
@ -175,7 +175,7 @@ public final class Condenser {
|
|||
i = document.getApplinks().entrySet().iterator();
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(false, false)), 99, flag_cat_hasapp, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getKey().toNormalform(true)), 99, flag_cat_hasapp, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(entry.getValue()), 99, flag_cat_hasapp, this.RESULT_FLAGS, true, meaningLib);
|
||||
}
|
||||
|
||||
|
@ -187,7 +187,7 @@ public final class Condenser {
|
|||
ientry = j.next();
|
||||
url = ientry.url();
|
||||
if (url == null) continue;
|
||||
insertTextToWords(new SentenceReader(url.toNormalform(false, false)), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(url.toNormalform(true)), 99, flag_cat_hasimage, this.RESULT_FLAGS, false, meaningLib);
|
||||
insertTextToWords(new SentenceReader(ientry.alt()), 99, flag_cat_hasimage, this.RESULT_FLAGS, true, meaningLib);
|
||||
}
|
||||
|
||||
|
|
|
@ -247,7 +247,7 @@ dc_rights
|
|||
}
|
||||
// put to triplestore
|
||||
JenaTripleStore.addTriple(subject, vocabulary.getPredicate(), sb.substring(1));
|
||||
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true, false));
|
||||
JenaTripleStore.addTriple(subject, Owl.SameAs.getPredicate(), this.source.toNormalform(true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -290,7 +290,7 @@ dc_rights
|
|||
}
|
||||
|
||||
public String dc_identifier() {
|
||||
return this.source.toNormalform(true, false);
|
||||
return this.source.toNormalform(true);
|
||||
}
|
||||
|
||||
public MultiProtocolURI dc_source() {
|
||||
|
@ -482,7 +482,7 @@ dc_rights
|
|||
} else {
|
||||
this.outboundlinks.put(url, "anchor" + (noindex ? " noindex" : "") + (nofollow ? " nofollow" : ""));
|
||||
}
|
||||
u = url.toNormalform(true, false);
|
||||
u = url.toNormalform(true);
|
||||
final String name = entry.getValue().getProperty("name", "");
|
||||
if (u.startsWith("mailto:")) {
|
||||
this.emaillinks.put(u.substring(7), name);
|
||||
|
@ -552,7 +552,7 @@ dc_rights
|
|||
assert false;
|
||||
continue;
|
||||
}
|
||||
u = url.toNormalform(true, true);
|
||||
u = url.toNormalform(true);
|
||||
if (u.endsWith("/"))
|
||||
u = u.substring(0, u.length() - 1);
|
||||
pos = u.lastIndexOf('/');
|
||||
|
@ -603,7 +603,7 @@ dc_rights
|
|||
continue loop;
|
||||
}
|
||||
if (url == null) continue loop;
|
||||
u = url.toNormalform(true, true);
|
||||
u = url.toNormalform(true);
|
||||
if ((pos = u.toLowerCase().indexOf("http://", 7)) > 0) {
|
||||
i.remove();
|
||||
u = u.substring(pos);
|
||||
|
|
|
@ -97,12 +97,12 @@ public interface Parser {
|
|||
}
|
||||
|
||||
public Failure(final String message, final MultiProtocolURI url) {
|
||||
super(message + "; url = " + url.toNormalform(true, false));
|
||||
super(message + "; url = " + url.toNormalform(true));
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public Failure(final String message, final MultiProtocolURI url, Throwable e) {
|
||||
super(message + "; url = " + url.toNormalform(true, false), e);
|
||||
super(message + "; url = " + url.toNormalform(true), e);
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ public final class TextParser {
|
|||
AbstractParser.log.logWarning(errorMsg);
|
||||
throw new Parser.Failure(errorMsg, location);
|
||||
}
|
||||
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true, false);
|
||||
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
|
||||
|
||||
Document[] docs = parseSource(location, mimeType, idioms, charset, content);
|
||||
|
||||
|
@ -222,7 +222,7 @@ public final class TextParser {
|
|||
AbstractParser.log.logWarning(errorMsg);
|
||||
throw new Parser.Failure(errorMsg, location);
|
||||
}
|
||||
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true, false);
|
||||
assert !idioms.isEmpty() : "no parsers applied for url " + location.toNormalform(true);
|
||||
|
||||
// if we do not have more than one parser or the content size is over MaxInt
|
||||
// then we use only one stream-oriented parser.
|
||||
|
@ -315,7 +315,7 @@ public final class TextParser {
|
|||
}
|
||||
String failedParsers = "";
|
||||
for (final Map.Entry<Parser, Parser.Failure> error: failedParser.entrySet()) {
|
||||
AbstractParser.log.logWarning("tried parser '" + error.getKey().getName() + "' to parse " + location.toNormalform(true, false) + " but failed: " + error.getValue().getMessage(), error.getValue());
|
||||
AbstractParser.log.logWarning("tried parser '" + error.getKey().getName() + "' to parse " + location.toNormalform(true) + " but failed: " + error.getValue().getMessage(), error.getValue());
|
||||
failedParsers += error.getKey().getName() + " ";
|
||||
}
|
||||
throw new Parser.Failure("All parser failed: " + failedParsers, location);
|
||||
|
|
|
@ -68,7 +68,7 @@ public class DCEntry extends TreeMap<String, String> {
|
|||
double lon
|
||||
) {
|
||||
super((Collator) insensitiveCollator.clone());
|
||||
this.put("dc:identifier", url.toNormalform(true, false));
|
||||
this.put("dc:identifier", url.toNormalform(true));
|
||||
this.put("dc:date", ISO8601Formatter.FORMATTER.format(date));
|
||||
this.put("dc:title", title);
|
||||
this.put("dc:creator", author);
|
||||
|
|
|
@ -116,7 +116,7 @@ public class OAIPMHImporter extends Thread implements Importer, Comparable<OAIPM
|
|||
|
||||
@Override
|
||||
public String source() {
|
||||
return this.source.toNormalform(true, false);
|
||||
return this.source.toNormalform(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -49,7 +49,7 @@ public class OAIPMHLoader {
|
|||
this.source = source;
|
||||
|
||||
// load the file from the net
|
||||
Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true, false));
|
||||
Log.logInfo("OAIPMHLoader", "loading record from " + source.toNormalform(true));
|
||||
Response response = null;
|
||||
IOException ee = null;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
|
@ -58,7 +58,7 @@ public class OAIPMHLoader {
|
|||
response = loader.load(loader.request(source, false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, TextSnippet.snippetMinLoadDelay);
|
||||
break;
|
||||
} catch (IOException e) {
|
||||
Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true, false));
|
||||
Log.logWarning("OAIPMHLoader", "loading failed at attempt " + (i + 1) + ": " + source.toNormalform(true));
|
||||
ee = e;
|
||||
continue;
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ public class OAIPMHLoader {
|
|||
}
|
||||
|
||||
public String source() {
|
||||
return this.source.toNormalform(true, false);
|
||||
return this.source.toNormalform(true);
|
||||
}
|
||||
|
||||
public static StringBuilder escape(final String s) {
|
||||
|
|
|
@ -106,7 +106,7 @@ public class ResumptionToken extends TreeMap<String, String> {
|
|||
* @return a string containing the url up to and including the '?'
|
||||
*/
|
||||
public static String truncatedURL(final DigestURI url) {
|
||||
String u = url.toNormalform(true, true);
|
||||
String u = url.toNormalform(true);
|
||||
final int i = u.indexOf('?');
|
||||
if (i > 0) u = u.substring(0, i + 1);
|
||||
return u;
|
||||
|
|
|
@ -86,7 +86,7 @@ public class AugmentParser extends AbstractParser implements Parser {
|
|||
it = Switchboard.getSwitchboard().tables.orderBy(it, -1, "timestamp_creation").iterator();
|
||||
while (it.hasNext()) {
|
||||
net.yacy.kelondro.blob.Tables.Row r = it.next();
|
||||
if (r.get("url", "").equals (url.toNormalform(false, false))) {
|
||||
if (r.get("url", "").equals (url.toNormalform(false))) {
|
||||
Set<String> tags = new HashSet<String>();
|
||||
for (String s : YMarkUtil.keysStringToSet(r.get("scitag", ""))) {
|
||||
tags.add(s);
|
||||
|
|
|
@ -185,7 +185,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
this.htmlFilterEventListeners = new EventListenerList();
|
||||
this.lon = 0.0d;
|
||||
this.lat = 0.0d;
|
||||
this.evaluationScores.match(Element.url, root.toNormalform(false, false));
|
||||
this.evaluationScores.match(Element.url, root.toNormalform(true));
|
||||
this.canonical = null;
|
||||
this.breadcrumbs = 0;
|
||||
}
|
||||
|
@ -348,10 +348,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
} catch (final MalformedURLException e) {}
|
||||
} else if (tagname.equalsIgnoreCase("frame")) {
|
||||
final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
|
||||
tagopts.put("src", src.toNormalform(true, false));
|
||||
tagopts.put("src", src.toNormalform(true));
|
||||
mergeAnchors(src, tagopts /* with property "name" */);
|
||||
this.frames.add(src);
|
||||
this.evaluationScores.match(Element.framepath, src.toNormalform(true, false));
|
||||
this.evaluationScores.match(Element.framepath, src.toNormalform(true));
|
||||
} else if (tagname.equalsIgnoreCase("body")) {
|
||||
final String c = tagopts.getProperty("class", EMPTY_STRING);
|
||||
this.evaluationScores.match(Element.bodyclass, c);
|
||||
|
@ -386,7 +386,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
if (href.length() > 0) {
|
||||
tagopts.put("nme", areatitle);
|
||||
MultiProtocolURI url = absolutePath(href);
|
||||
tagopts.put("href", url.toNormalform(true, false));
|
||||
tagopts.put("href", url.toNormalform(true));
|
||||
mergeAnchors(url, tagopts);
|
||||
}
|
||||
} else if (tagname.equalsIgnoreCase("link")) {
|
||||
|
@ -394,7 +394,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
final MultiProtocolURI newLink = absolutePath(href);
|
||||
|
||||
if (newLink != null) {
|
||||
tagopts.put("href", newLink.toNormalform(true, false));
|
||||
tagopts.put("href", newLink.toNormalform(true));
|
||||
final String rel = tagopts.getProperty("rel", EMPTY_STRING);
|
||||
final String linktitle = tagopts.getProperty("title", EMPTY_STRING);
|
||||
final String type = tagopts.getProperty("type", EMPTY_STRING);
|
||||
|
@ -425,7 +425,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
if (url != null) {
|
||||
final int width = Integer.parseInt(tagopts.getProperty("width", "-1"));
|
||||
final int height = Integer.parseInt(tagopts.getProperty("height", "-1"));
|
||||
tagopts.put("src", url.toNormalform(true, false));
|
||||
tagopts.put("src", url.toNormalform(true));
|
||||
final EmbedEntry ie = new EmbedEntry(url, width, height, tagopts.getProperty("type", EMPTY_STRING), tagopts.getProperty("pluginspage", EMPTY_STRING));
|
||||
this.embeds.put(url, ie);
|
||||
mergeAnchors(url, tagopts);
|
||||
|
@ -436,15 +436,15 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
final String name = tagopts.getProperty("name", EMPTY_STRING);
|
||||
if (name.equalsIgnoreCase("movie")) {
|
||||
MultiProtocolURI url = absolutePath(tagopts.getProperty("value", EMPTY_STRING));
|
||||
tagopts.put("value", url.toNormalform(true, false));
|
||||
tagopts.put("value", url.toNormalform(true));
|
||||
mergeAnchors(url, tagopts /* with property "name" */);
|
||||
}
|
||||
} else if (tagname.equalsIgnoreCase("iframe")) {
|
||||
final MultiProtocolURI src = absolutePath(tagopts.getProperty("src", EMPTY_STRING));
|
||||
tagopts.put("src", src.toNormalform(true, false));
|
||||
tagopts.put("src", src.toNormalform(true));
|
||||
mergeAnchors(src, tagopts /* with property "name" */);
|
||||
this.iframes.add(src);
|
||||
this.evaluationScores.match(Element.iframepath, src.toNormalform(true, false));
|
||||
this.evaluationScores.match(Element.iframepath, src.toNormalform(true));
|
||||
} else if (tagname.equalsIgnoreCase("html")) {
|
||||
final String lang = tagopts.getProperty("lang", EMPTY_STRING);
|
||||
if (!lang.isEmpty()) // fake a language meta to preserv detection from <html lang="xx" />
|
||||
|
@ -471,7 +471,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
addImage(this.images, ie);
|
||||
} else {
|
||||
tagopts.put("text", recursiveParse(text));
|
||||
tagopts.put("href", url.toNormalform(true, false)); // we must assign this because the url may have resolved backpaths and may not be absolute
|
||||
tagopts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
|
||||
mergeAnchors(url, tagopts);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ public class EmbedEntry {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<embed url=\"" + this.url.toNormalform(false, false) + "\"" +
|
||||
return "<embed url=\"" + this.url.toNormalform(false) + "\"" +
|
||||
(this.type != null && this.type.length() > 0 ? " type=\"" + this.type + "\"" : "") +
|
||||
(this.pluginspage != null && this.pluginspage.length() > 0 ? " pluginspage=\"" + this.pluginspage + "\"" : "") +
|
||||
(this.width >= 0 ? " width=\"" + this.width + "\"" : "") +
|
||||
|
|
|
@ -66,7 +66,7 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<img url=\"" + this.url.toNormalform(false, false) + "\"" +
|
||||
return "<img url=\"" + this.url.toNormalform(false) + "\"" +
|
||||
(this.alt != null && this.alt.length() > 0 ? " alt=\"" + this.alt + "\"" : "") +
|
||||
(this.width >= 0 ? " width=\"" + this.width + "\"" : "") +
|
||||
(this.height >= 0 ? " height=\"" + this.height + "\"" : "") +
|
||||
|
@ -91,7 +91,7 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry
|
|||
// assuming that hashCode would return a 'perfect hash' this method would
|
||||
// create a total ordering on images with respect on the image size
|
||||
assert (this.url != null);
|
||||
if (this.url.toNormalform(true, true).equals((h).url.toNormalform(true, true))) return 0;
|
||||
if (this.url.toNormalform(true).equals((h).url.toNormalform(true))) return 0;
|
||||
final int thc = this.hashCode();
|
||||
final int ohc = (h).hashCode();
|
||||
if (thc < ohc) return -1;
|
||||
|
|
|
@ -111,7 +111,7 @@ public class sitemapParser extends AbstractParser implements Parser {
|
|||
|
||||
public static SitemapReader parse(final DigestURI sitemapURL) throws IOException {
|
||||
// download document
|
||||
Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true, false));
|
||||
Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
|
||||
final RequestHeader requestHeader = new RequestHeader();
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
|
||||
final HTTPClient client = new HTTPClient();
|
||||
|
|
|
@ -138,8 +138,8 @@ public class AugmentHtmlStream {
|
|||
d.head().append ("<script type='text/javascript'>"+loadInternal("interaction_elements/interaction_metadata.js", requestHeader)+"</script>");
|
||||
|
||||
|
||||
d.body().append (loadInternal("interaction_elements/OverlayInteraction.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader));
|
||||
d.body().append (loadInternal("interaction_elements/Footer.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false, true), requestHeader));
|
||||
d.body().append (loadInternal("interaction_elements/OverlayInteraction.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false), requestHeader));
|
||||
d.body().append (loadInternal("interaction_elements/Footer.html?action="+action+"&urlhash="+ ASCII.String(url.hash()) +"&url="+url.toNormalform(false), requestHeader));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -242,7 +242,7 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
|
|||
final StringBuilder hashs = new StringBuilder(12);
|
||||
assert hashs.length() == 0;
|
||||
// form the 'local' part of the hash
|
||||
final String normalform = toNormalform(true, true, true);
|
||||
final String normalform = toNormalform(true, true);
|
||||
final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform));
|
||||
if (b64l.length() < 5) return null;
|
||||
hashs.append(b64l.substring(0, 5)); // 5 chars
|
||||
|
|
|
@ -346,7 +346,7 @@ public class URIMetadataNode implements URIMetadata {
|
|||
|
||||
try {
|
||||
s.append("hash=").append(ASCII.String(md.hash()));
|
||||
s.append(",url=").append(crypt.simpleEncode(md.url().toNormalform(false, true)));
|
||||
s.append(",url=").append(crypt.simpleEncode(md.url().toNormalform(true)));
|
||||
s.append(",descr=").append(crypt.simpleEncode(md.dc_title()));
|
||||
s.append(",author=").append(crypt.simpleEncode(md.dc_creator()));
|
||||
s.append(",tags=").append(crypt.simpleEncode(Tagging.cleanTagFromAutotagging(md.dc_subject())));
|
||||
|
|
|
@ -197,7 +197,7 @@ public class URIMetadataRow implements URIMetadata {
|
|||
final double lat,
|
||||
final double lon) {
|
||||
final CharBuffer s = new CharBuffer(3600, 360);
|
||||
s.append(url.toNormalform(false, true)).appendLF();
|
||||
s.append(url.toNormalform(true)).appendLF();
|
||||
s.append(dc_title).appendLF();
|
||||
if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
|
||||
s.appendLF();
|
||||
|
@ -585,7 +585,7 @@ public class URIMetadataRow implements URIMetadata {
|
|||
}
|
||||
public boolean matches(final Pattern matcher) {
|
||||
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
|
||||
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
|
||||
if (this.url != null) return matcher.matcher(this.url.toNormalform(true).toLowerCase()).matches();
|
||||
return false;
|
||||
}
|
||||
public DigestURI url() {
|
||||
|
|
|
@ -81,7 +81,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
|||
this.lother = md.lother();
|
||||
this.positions = new LinkedBlockingQueue<Integer>();
|
||||
this.positions.add(1);
|
||||
String urlNormalform = md.url().toNormalform(true, false);
|
||||
String urlNormalform = md.url().toNormalform(true);
|
||||
this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length;
|
||||
this.urllength = urlNormalform.length();
|
||||
this.virtualAge = -1; // compute that later
|
||||
|
|
|
@ -209,7 +209,7 @@ public final class LoaderDispatcher {
|
|||
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
|
||||
DigestURI refererURL = null;
|
||||
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
||||
final Response response = new Response(
|
||||
request,
|
||||
requestHeader,
|
||||
|
@ -223,7 +223,7 @@ public final class LoaderDispatcher {
|
|||
// well, just take the cache and don't care about freshness of the content
|
||||
final byte[] content = Cache.getContent(url.hash());
|
||||
if (content != null) {
|
||||
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true, false));
|
||||
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true));
|
||||
response.setContent(content);
|
||||
return response;
|
||||
}
|
||||
|
@ -234,14 +234,14 @@ public final class LoaderDispatcher {
|
|||
if (response.isFreshForProxy()) {
|
||||
final byte[] content = Cache.getContent(url.hash());
|
||||
if (content != null) {
|
||||
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true, false));
|
||||
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true));
|
||||
response.setContent(content);
|
||||
return response;
|
||||
}
|
||||
}
|
||||
this.log.logInfo("cache hit/stale for: " + url.toNormalform(true, false));
|
||||
this.log.logInfo("cache hit/stale for: " + url.toNormalform(true));
|
||||
} else if (cachedResponse != null) {
|
||||
this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true, false));
|
||||
this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2388,7 +2388,7 @@ public final class Switchboard extends serverSwitch
|
|||
(
|
||||
response.profile() == null ||
|
||||
response.depth() < response.profile().depth() ||
|
||||
response.profile().crawlerNoDepthLimitMatchPattern().matcher(response.url().toNormalform(false, false)).matches()
|
||||
response.profile().crawlerNoDepthLimitMatchPattern().matcher(response.url().toNormalform(true)).matches()
|
||||
)
|
||||
) {
|
||||
// get the hyperlinks
|
||||
|
@ -2410,7 +2410,7 @@ public final class Switchboard extends serverSwitch
|
|||
|
||||
// process the next hyperlink
|
||||
nextUrl = nextEntry.getKey();
|
||||
String u = nextUrl.toNormalform(true, true, true);
|
||||
String u = nextUrl.toNormalform(true, true);
|
||||
if ( !(u.startsWith("http://")
|
||||
|| u.startsWith("https://")
|
||||
|| u.startsWith("ftp://")
|
||||
|
@ -2447,7 +2447,7 @@ public final class Switchboard extends serverSwitch
|
|||
this.log.logInfo("CRAWL: ADDED "
|
||||
+ hl.size()
|
||||
+ " LINKS FROM "
|
||||
+ response.url().toNormalform(false, true)
|
||||
+ response.url().toNormalform(true)
|
||||
+ ", STACKING TIME = "
|
||||
+ (stackEndTime - stackStartTime)
|
||||
+ ", PARSING TIME = "
|
||||
|
@ -2460,7 +2460,7 @@ public final class Switchboard extends serverSwitch
|
|||
public IndexingQueueEntry condenseDocument(final IndexingQueueEntry in) {
|
||||
in.queueEntry.updateStatus(Response.QUEUE_STATE_CONDENSING);
|
||||
CrawlProfile profile = in.queueEntry.profile();
|
||||
String urls = in.queueEntry.url().toNormalform(false, true);
|
||||
String urls = in.queueEntry.url().toNormalform(true);
|
||||
|
||||
// check profile attributes which prevent indexing (while crawling is allowed)
|
||||
if (!profile.indexText() && !profile.indexMedia()) {
|
||||
|
@ -2616,7 +2616,7 @@ public final class Switchboard extends serverSwitch
|
|||
for ( final Map.Entry<MultiProtocolURI, String> rssEntry : document.getRSS().entrySet() ) {
|
||||
final Tables.Data rssRow = new Tables.Data();
|
||||
rssRow.put("referrer", url.hash());
|
||||
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true, false)));
|
||||
rssRow.put("url", UTF8.getBytes(rssEntry.getKey().toNormalform(true)));
|
||||
rssRow.put("title", UTF8.getBytes(rssEntry.getValue()));
|
||||
rssRow.put("recording_date", new Date());
|
||||
try {
|
||||
|
@ -2643,7 +2643,7 @@ public final class Switchboard extends serverSwitch
|
|||
EventTracker.update(EventTracker.EClass.PPM, Long.valueOf(currentPPM()), true);
|
||||
lastPPMUpdate = System.currentTimeMillis();
|
||||
}
|
||||
EventTracker.update(EventTracker.EClass.INDEX, url.toNormalform(true, false), false);
|
||||
EventTracker.update(EventTracker.EClass.INDEX, url.toNormalform(true), false);
|
||||
|
||||
// if this was performed for a remote crawl request, notify requester
|
||||
if ( (processCase == EventOrigin.GLOBAL_CRAWLING) && (queueEntry.initiator() != null) ) {
|
||||
|
@ -2719,7 +2719,7 @@ public final class Switchboard extends serverSwitch
|
|||
final Request request = this.loader.request(url, true, true);
|
||||
final CrawlProfile profile = sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
|
||||
final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
|
||||
final String urls = url.toNormalform(false, false);
|
||||
final String urls = url.toNormalform(true);
|
||||
if ( acceptedError != null ) {
|
||||
this.log.logWarning("addToIndex: cannot load "
|
||||
+ urls
|
||||
|
@ -2759,18 +2759,18 @@ public final class Switchboard extends serverSwitch
|
|||
searchEvent,
|
||||
"heuristic:" + heuristicName);
|
||||
Switchboard.this.log.logInfo("addToIndex fill of url "
|
||||
+ url.toNormalform(true, true)
|
||||
+ url.toNormalform(true)
|
||||
+ " finished");
|
||||
}
|
||||
}
|
||||
} catch ( final IOException e ) {
|
||||
Switchboard.this.log.logWarning("addToIndex: failed loading "
|
||||
+ url.toNormalform(false, false)
|
||||
+ url.toNormalform(true)
|
||||
+ ": "
|
||||
+ e.getMessage());
|
||||
} catch ( final Parser.Failure e ) {
|
||||
Switchboard.this.log.logWarning("addToIndex: failed parsing "
|
||||
+ url.toNormalform(false, false)
|
||||
+ url.toNormalform(true)
|
||||
+ ": "
|
||||
+ e.getMessage());
|
||||
}
|
||||
|
@ -2796,7 +2796,7 @@ public final class Switchboard extends serverSwitch
|
|||
final String acceptedError = this.crawlStacker.checkAcceptance(url, profile, 0);
|
||||
if (acceptedError != null) {
|
||||
this.log.logInfo("addToCrawler: cannot load "
|
||||
+ url.toNormalform(false, false)
|
||||
+ url.toNormalform(true)
|
||||
+ ": "
|
||||
+ acceptedError);
|
||||
return;
|
||||
|
@ -2810,7 +2810,7 @@ public final class Switchboard extends serverSwitch
|
|||
|
||||
if (s != null) {
|
||||
Switchboard.this.log.logInfo("addToCrawler: failed to add "
|
||||
+ url.toNormalform(false, false)
|
||||
+ url.toNormalform(true)
|
||||
+ ": "
|
||||
+ s);
|
||||
}
|
||||
|
@ -2840,7 +2840,7 @@ public final class Switchboard extends serverSwitch
|
|||
"");
|
||||
if ( response == null ) {
|
||||
Switchboard.this.log.logInfo("Sending crawl receipt for '"
|
||||
+ this.reference.url().toNormalform(false, true)
|
||||
+ this.reference.url().toNormalform(true)
|
||||
+ "' to "
|
||||
+ this.initiatorPeer.getName()
|
||||
+ " FAILED, send time = "
|
||||
|
@ -2849,7 +2849,7 @@ public final class Switchboard extends serverSwitch
|
|||
}
|
||||
final String delay = response.get("delay");
|
||||
Switchboard.this.log.logInfo("Sending crawl receipt for '"
|
||||
+ this.reference.url().toNormalform(false, true)
|
||||
+ this.reference.url().toNormalform(true)
|
||||
+ "' to "
|
||||
+ this.initiatorPeer.getName()
|
||||
+ " success, delay = "
|
||||
|
|
|
@ -557,7 +557,7 @@ public final class Fulltext implements Iterable<byte[]> {
|
|||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
if (this.set != null && !this.set.has(entry.hash())) continue;
|
||||
url = entry.url().toNormalform(true, false);
|
||||
url = entry.url().toNormalform(true);
|
||||
if (!url.matches(this.filter)) continue;
|
||||
if (this.format == 0) {
|
||||
pw.println(url);
|
||||
|
|
|
@ -218,7 +218,7 @@ public class Segment {
|
|||
String hh = DigestURI.hosthash(host);
|
||||
final BlockingQueue<String> hostQueue = this.fulltext.getSolr().concurrentIDs(YaCySchema.host_id_s + ":" + hh, 0, Integer.MAX_VALUE, 10000);
|
||||
|
||||
final String urlstub = stub.toNormalform(false, false);
|
||||
final String urlstub = stub.toNormalform(true);
|
||||
|
||||
// now filter the stub from the iterated urls
|
||||
return new LookAheadIterator<DigestURI>() {
|
||||
|
@ -234,7 +234,7 @@ public class Segment {
|
|||
}
|
||||
if (id == null || id == AbstractSolrConnector.POISON_ID) return null;
|
||||
DigestURI u = Segment.this.fulltext.getMetadata(ASCII.getBytes(id)).url();
|
||||
if (u.toNormalform(true, false).startsWith(urlstub)) return u;
|
||||
if (u.toNormalform(true).startsWith(urlstub)) return u;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -361,7 +361,7 @@ public class Segment {
|
|||
// load some document metadata
|
||||
final String id = ASCII.String(url.hash());
|
||||
final String dc_title = document.dc_title();
|
||||
final String urlNormalform = url.toNormalform(true, false);
|
||||
final String urlNormalform = url.toNormalform(true);
|
||||
final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
|
||||
|
||||
// STORE URL TO LOADED-URL-DB
|
||||
|
|
|
@ -218,7 +218,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
|
||||
if (allAttr || contains(YaCySchema.failreason_t)) add(doc, YaCySchema.failreason_t, "");
|
||||
add(doc, YaCySchema.id, ASCII.String(md.hash()));
|
||||
String us = digestURI.toNormalform(true, false);
|
||||
String us = digestURI.toNormalform(true);
|
||||
add(doc, YaCySchema.sku, us);
|
||||
if (allAttr || contains(YaCySchema.ip_s)) {
|
||||
final InetAddress address = digestURI.getInetAddress();
|
||||
|
@ -345,7 +345,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
boolean allAttr = this.isEmpty();
|
||||
add(doc, YaCySchema.id, id);
|
||||
if (allAttr || contains(YaCySchema.failreason_t)) add(doc, YaCySchema.failreason_t, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
|
||||
String us = digestURI.toNormalform(true, false);
|
||||
String us = digestURI.toNormalform(true);
|
||||
add(doc, YaCySchema.sku, us);
|
||||
if (allAttr || contains(YaCySchema.ip_s)) {
|
||||
final InetAddress address = digestURI.getInetAddress();
|
||||
|
@ -562,7 +562,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
final String[] css_url = new String[csss.size()];
|
||||
c = 0;
|
||||
for (final Map.Entry<MultiProtocolURI, String> entry: csss.entrySet()) {
|
||||
final String url = entry.getKey().toNormalform(false, false);
|
||||
final String url = entry.getKey().toNormalform(false);
|
||||
inboundLinks.remove(url);
|
||||
outboundLinks.remove(url);
|
||||
css_tag[c] =
|
||||
|
@ -584,7 +584,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
for (final MultiProtocolURI url: scriptss) {
|
||||
inboundLinks.remove(url);
|
||||
outboundLinks.remove(url);
|
||||
scripts[c++] = url.toNormalform(false, false);
|
||||
scripts[c++] = url.toNormalform(false);
|
||||
}
|
||||
add(doc, YaCySchema.scriptscount_i, scripts.length);
|
||||
if (scripts.length > 0) add(doc, YaCySchema.scripts_txt, scripts);
|
||||
|
@ -598,7 +598,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
for (final MultiProtocolURI url: framess) {
|
||||
inboundLinks.remove(url);
|
||||
outboundLinks.remove(url);
|
||||
frames[c++] = url.toNormalform(false, false);
|
||||
frames[c++] = url.toNormalform(false);
|
||||
}
|
||||
add(doc, YaCySchema.framesscount_i, frames.length);
|
||||
if (frames.length > 0) add(doc, YaCySchema.frames_txt, frames);
|
||||
|
@ -612,7 +612,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
for (final MultiProtocolURI url: iframess) {
|
||||
inboundLinks.remove(url);
|
||||
outboundLinks.remove(url);
|
||||
iframes[c++] = url.toNormalform(false, false);
|
||||
iframes[c++] = url.toNormalform(false);
|
||||
}
|
||||
add(doc, YaCySchema.iframesscount_i, iframes.length);
|
||||
if (iframes.length > 0) add(doc, YaCySchema.iframes_txt, iframes);
|
||||
|
@ -624,7 +624,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
if (canonical != null) {
|
||||
inboundLinks.remove(canonical);
|
||||
outboundLinks.remove(canonical);
|
||||
add(doc, YaCySchema.canonical_t, canonical.toNormalform(false, false));
|
||||
add(doc, YaCySchema.canonical_t, canonical.toNormalform(false));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -638,7 +638,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
if (refreshURL != null) {
|
||||
inboundLinks.remove(refreshURL);
|
||||
outboundLinks.remove(refreshURL);
|
||||
add(doc, YaCySchema.refresh_s, refreshURL.toNormalform(false, false));
|
||||
add(doc, YaCySchema.refresh_s, refreshURL.toNormalform(false));
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
add(doc, YaCySchema.refresh_s, refresh);
|
||||
|
@ -692,7 +692,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
final String name = p.getProperty("name", ""); // the name attribute
|
||||
final String rel = p.getProperty("rel", ""); // the rel-attribute
|
||||
final String text = p.getProperty("text", ""); // the text between the <a></a> tag
|
||||
final String urls = url.toNormalform(false, false);
|
||||
final String urls = url.toNormalform(false);
|
||||
final int pr = urls.indexOf("://",0);
|
||||
inboundlinksURLProtocol.add(urls.substring(0, pr));
|
||||
inboundlinksURLStub.add(urls.substring(pr + 3));
|
||||
|
@ -702,7 +702,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
inboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
|
||||
inboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
|
||||
inboundlinksTag.add(
|
||||
"<a href=\"" + url.toNormalform(false, false) + "\"" +
|
||||
"<a href=\"" + url.toNormalform(false) + "\"" +
|
||||
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +
|
||||
(name.length() > 0 ? " name=\"" + name + "\"" : "") +
|
||||
">" +
|
||||
|
@ -740,7 +740,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
final String name = p.getProperty("name", ""); // the name attribute
|
||||
final String rel = p.getProperty("rel", ""); // the rel-attribute
|
||||
final String text = p.getProperty("text", ""); // the text between the <a></a> tag
|
||||
final String urls = url.toNormalform(false, false);
|
||||
final String urls = url.toNormalform(false);
|
||||
final int pr = urls.indexOf("://",0);
|
||||
outboundlinksURLProtocol.add(urls.substring(0, pr));
|
||||
outboundlinksURLStub.add(urls.substring(pr + 3));
|
||||
|
@ -750,7 +750,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
outboundlinksTextChars.add(text.length() > 0 ? text.length() : 0);
|
||||
outboundlinksTextWords.add(text.length() > 0 ? text.split(" ").length : 0);
|
||||
outboundlinksTag.add(
|
||||
"<a href=\"" + url.toNormalform(false, false) + "\"" +
|
||||
"<a href=\"" + url.toNormalform(false) + "\"" +
|
||||
(rel.length() > 0 ? " rel=\"" + rel + "\"" : "") +
|
||||
(name.length() > 0 ? " name=\"" + name + "\"" : "") +
|
||||
">" +
|
||||
|
@ -898,7 +898,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
public SolrInputDocument err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException {
|
||||
final SolrInputDocument solrdoc = new SolrInputDocument();
|
||||
add(solrdoc, YaCySchema.id, ASCII.String(digestURI.hash()));
|
||||
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true, false));
|
||||
add(solrdoc, YaCySchema.sku, digestURI.toNormalform(true));
|
||||
final InetAddress address = digestURI.getInetAddress();
|
||||
if (contains(YaCySchema.ip_s) && address != null) add(solrdoc, YaCySchema.ip_s, address.getHostAddress());
|
||||
if (contains(YaCySchema.host_s) && digestURI.getHost() != null) add(solrdoc, YaCySchema.host_s, digestURI.getHost());
|
||||
|
|
|
@ -703,7 +703,7 @@ public final class RWIProcess extends Thread
|
|||
}
|
||||
}
|
||||
|
||||
final String pageurl = page.url().toNormalform(true, true);
|
||||
final String pageurl = page.url().toNormalform(true);
|
||||
final String pageauthor = page.dc_creator();
|
||||
final String pagetitle = page.dc_title().toLowerCase();
|
||||
|
||||
|
|
|
@ -309,7 +309,7 @@ public class SnippetProcess {
|
|||
r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
|
||||
|
||||
// prefer hit with 'prefer' pattern
|
||||
if (this.query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) {
|
||||
if (this.query.prefer.matcher(rentry.url().toNormalform(true)).matches()) {
|
||||
r += 256 << this.query.ranking.coeff_prefer;
|
||||
}
|
||||
if (this.query.prefer.matcher(rentry.title()).matches()) {
|
||||
|
@ -317,7 +317,7 @@ public class SnippetProcess {
|
|||
}
|
||||
|
||||
// apply 'common-sense' heuristic using references
|
||||
final String urlstring = rentry.url().toNormalform(true, true);
|
||||
final String urlstring = rentry.url().toNormalform(true);
|
||||
final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
|
||||
final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
|
||||
int tc;
|
||||
|
@ -491,7 +491,7 @@ public class SnippetProcess {
|
|||
break; // no more available
|
||||
}
|
||||
|
||||
this.setName(page.url().toNormalform(true, false)); // to support debugging
|
||||
this.setName(page.url().toNormalform(true)); // to support debugging
|
||||
if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) {
|
||||
continue;
|
||||
}
|
||||
|
@ -618,7 +618,7 @@ public class SnippetProcess {
|
|||
if (this.deleteIfSnippetFail) {
|
||||
this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.query_include_hashes, reason);
|
||||
}
|
||||
log.logInfo("sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason);
|
||||
log.logInfo("sorted out url " + page.url().toNormalform(true) + " during search: " + reason);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -180,7 +180,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|||
url = new DigestURI(entry.getKey());
|
||||
desc = entry.getValue();
|
||||
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
|
||||
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
|
||||
final int ranking = removeAppearanceHashes(url.toNormalform(true), queryhashes).size() +
|
||||
removeAppearanceHashes(desc, queryhashes).size();
|
||||
if (ranking < 2 * queryhashes.size()) {
|
||||
result.add(new MediaSnippet(mediatype, url, Classification.url2mime(url), desc, document.getTextLength(), null, ranking, source));
|
||||
|
@ -210,7 +210,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|||
if (ientry.width() > 0 && ientry.width() < 32) continue;
|
||||
desc = ientry.alt();
|
||||
final int appcount = queryhashes.size() * 2 -
|
||||
removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
|
||||
removeAppearanceHashes(url.toNormalform(true), queryhashes).size() -
|
||||
removeAppearanceHashes(desc, queryhashes).size();
|
||||
final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);
|
||||
result.add(new MediaSnippet(ContentDomain.IMAGE, url, Classification.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));
|
||||
|
|
|
@ -131,10 +131,10 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
|||
return this.urlentry.flags();
|
||||
}
|
||||
public String urlstring() {
|
||||
return (this.alternative_urlstring == null) ? this.urlentry.url().toNormalform(false, true) : this.alternative_urlstring;
|
||||
return (this.alternative_urlstring == null) ? this.urlentry.url().toNormalform(true) : this.alternative_urlstring;
|
||||
}
|
||||
public String urlname() {
|
||||
return (this.alternative_urlname == null) ? MultiProtocolURI.unescape(this.urlentry.url().toNormalform(false, true)) : this.alternative_urlname;
|
||||
return (this.alternative_urlname == null) ? MultiProtocolURI.unescape(this.urlentry.url().toNormalform(true)) : this.alternative_urlname;
|
||||
}
|
||||
public String title() {
|
||||
return this.urlentry.dc_title();
|
||||
|
|
|
@ -27,7 +27,7 @@ public class AugmentedHtmlStream extends FilterOutputStream {
|
|||
this.buffer = new ByteArrayOutputStream();
|
||||
this.charset = charset;
|
||||
this.url = url;
|
||||
this.urls = this.url.toNormalform(false, true);
|
||||
this.urls = this.url.toNormalform(false);
|
||||
this.requestHeader = requestHeader;
|
||||
}
|
||||
|
||||
|
|
|
@ -315,7 +315,7 @@ public final class HTTPDProxyHandler {
|
|||
//redirector
|
||||
if (redirectorEnabled){
|
||||
synchronized(redirectorProcess){
|
||||
redirectorWriter.println(url.toNormalform(false, true));
|
||||
redirectorWriter.println(url.toNormalform(true));
|
||||
redirectorWriter.flush();
|
||||
}
|
||||
final String newUrl = redirectorReader.readLine();
|
||||
|
|
Loading…
Reference in New Issue
Block a user