mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fix for problem with remote crawl referrers
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4210 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
875096552f
commit
bc2368e907
|
@ -185,7 +185,7 @@ public final class crawlOrder {
|
|||
if (!newURL.equals(urlv.get(0))) {
|
||||
env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0));
|
||||
}
|
||||
String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true);
|
||||
yacyURL refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null);
|
||||
if ((refURL != null) && (!refURL.equals(refv.get(0)))) {
|
||||
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ public final class crawlOrder {
|
|||
for (int i = 0; i < count; i++) {
|
||||
env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'");
|
||||
try {
|
||||
stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), (String) refv.get(i), iam, youare);
|
||||
stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), ((refv.get(i) == null) || (((String) refv.get(i)).length() == 0)) ? null : new yacyURL((String) refv.get(i), null), iam, youare);
|
||||
response = (String) stackresult[0];
|
||||
prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]);
|
||||
prop.put("list_" + i + "_lurl", (String) stackresult[2]);
|
||||
|
@ -244,7 +244,7 @@ public final class crawlOrder {
|
|||
return prop;
|
||||
}
|
||||
|
||||
private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, String referrer, String iam, String youare) {
|
||||
private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, yacyURL referrer, String iam, String youare) {
|
||||
String response, reason, lurl;
|
||||
// stack url
|
||||
switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
|
||||
|
|
|
@ -63,7 +63,7 @@ public class plasmaCrawlEntry {
|
|||
|
||||
private String initiator; // the initiator hash, is NULL or "" if it is the own proxy;
|
||||
// if this is generated by a crawl, the own peer hash in entered
|
||||
private String referrer; // the url's referrer hash
|
||||
private String refhash; // the url's referrer hash
|
||||
private yacyURL url; // the url as string
|
||||
private String name; // the name of the url, from anchor tag <a>name</a>
|
||||
private long appdate; // the time when the url was first time appeared
|
||||
|
@ -97,7 +97,7 @@ public class plasmaCrawlEntry {
|
|||
public plasmaCrawlEntry(
|
||||
String initiator,
|
||||
yacyURL url,
|
||||
String referrer,
|
||||
String referrerhash,
|
||||
String name,
|
||||
Date appdate,
|
||||
String profileHandle,
|
||||
|
@ -111,7 +111,7 @@ public class plasmaCrawlEntry {
|
|||
if ((initiator == null) || (initiator.length() == 0)) initiator = yacyURL.dummyHash;
|
||||
this.initiator = initiator;
|
||||
this.url = url;
|
||||
this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer;
|
||||
this.refhash = (referrerhash == null) ? yacyURL.dummyHash : referrerhash;
|
||||
this.name = (name == null) ? "" : name;
|
||||
this.appdate = (appdate == null) ? 0 : appdate.getTime();
|
||||
this.profileHandle = profileHandle; // must not be null
|
||||
|
@ -137,7 +137,7 @@ public class plasmaCrawlEntry {
|
|||
if (urlstring == null) throw new IOException ("url string is null");
|
||||
this.initiator = entry.getColString(1, null);
|
||||
this.url = new yacyURL(urlstring, entry.getColString(0, null));
|
||||
this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
|
||||
this.refhash = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
|
||||
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
|
||||
this.appdate = entry.getColLong(5);
|
||||
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
|
||||
|
@ -189,7 +189,7 @@ public class plasmaCrawlEntry {
|
|||
this.url.hash().getBytes(),
|
||||
(initiator == null) ? "".getBytes() : this.initiator.getBytes(),
|
||||
this.url.toString().getBytes(),
|
||||
this.referrer.getBytes(),
|
||||
this.refhash.getBytes(),
|
||||
namebytes,
|
||||
appdatestr,
|
||||
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
|
||||
|
@ -216,7 +216,7 @@ public class plasmaCrawlEntry {
|
|||
|
||||
public String referrerhash() {
|
||||
// the urlhash of a referer url
|
||||
return this.referrer;
|
||||
return this.refhash;
|
||||
}
|
||||
|
||||
public String initiator() {
|
||||
|
|
|
@ -343,7 +343,7 @@ public final class plasmaCrawlStacker extends Thread {
|
|||
return new plasmaCrawlEntry(entry);
|
||||
}
|
||||
|
||||
public String stackCrawl(yacyURL url, String referrerhash, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) {
|
||||
public String stackCrawl(yacyURL url, yacyURL referrer, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) {
|
||||
// stacks a crawl item. The position can also be remote
|
||||
// returns null if successful, a reason string if not successful
|
||||
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
|
||||
|
@ -352,7 +352,7 @@ public final class plasmaCrawlStacker extends Thread {
|
|||
plasmaCrawlEntry entry = new plasmaCrawlEntry(
|
||||
initiatorHash, // initiator, needed for p2p-feedback
|
||||
url, // url clear text string
|
||||
referrerhash, // last url in crawling queue
|
||||
(referrer == null) ? null : referrer.hash(), // last url in crawling queue
|
||||
name, // load date
|
||||
loadDate, // the anchor name
|
||||
(profile == null) ? null : profile.handle(), // profile must not be null!
|
||||
|
|
Loading…
Reference in New Issue
Block a user