mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) Bugfix for Entries with null url in GlobalQueue
See: http://www.yacy-forum.de/viewtopic.php?p=12675#12675 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1069 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
22da652d4f
commit
444a5a9368
|
@ -100,10 +100,10 @@ public class IndexCreateWWWGlobalQueue_p {
|
||||||
yacySeed initiator;
|
yacySeed initiator;
|
||||||
String profileHandle;
|
String profileHandle;
|
||||||
plasmaCrawlProfile.entry profileEntry;
|
plasmaCrawlProfile.entry profileEntry;
|
||||||
int i;
|
int i, showNum = 0;
|
||||||
for (i = 0; i < crawlerList.length; i++) {
|
for (i = 0; i < crawlerList.length; i++) {
|
||||||
urle = crawlerList[i];
|
urle = crawlerList[i];
|
||||||
if (urle != null) {
|
if ((urle != null)&&(urle.url()!=null)) {
|
||||||
initiator = yacyCore.seedDB.getConnected(urle.initiator());
|
initiator = yacyCore.seedDB.getConnected(urle.initiator());
|
||||||
profileHandle = urle.profileHandle();
|
profileHandle = urle.profileHandle();
|
||||||
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
|
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
|
||||||
|
@ -115,9 +115,10 @@ public class IndexCreateWWWGlobalQueue_p {
|
||||||
prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name()));
|
prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name()));
|
||||||
prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString()));
|
prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString()));
|
||||||
dark = !dark;
|
dark = !dark;
|
||||||
|
showNum++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prop.put("crawler-queue_list", i);
|
prop.put("crawler-queue_list", showNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
// return rewrite properties
|
// return rewrite properties
|
||||||
|
|
|
@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||||
import de.anomic.htmlFilter.htmlFilterOutputStream;
|
import de.anomic.htmlFilter.htmlFilterOutputStream;
|
||||||
import de.anomic.http.httpHeader;
|
import de.anomic.http.httpHeader;
|
||||||
import de.anomic.plasma.plasmaCrawlProfile;
|
import de.anomic.plasma.plasmaCrawlProfile;
|
||||||
import de.anomic.plasma.plasmaParser;
|
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
import de.anomic.plasma.plasmaURL;
|
import de.anomic.plasma.plasmaURL;
|
||||||
import de.anomic.server.serverFileUtils;
|
import de.anomic.server.serverFileUtils;
|
||||||
|
|
|
@ -430,25 +430,25 @@ public final class plasmaCrawlLURL extends plasmaURL {
|
||||||
// if the url cannot be found, this returns null
|
// if the url cannot be found, this returns null
|
||||||
this.urlHash = urlHash;
|
this.urlHash = urlHash;
|
||||||
try {
|
try {
|
||||||
byte[][] entry = urlHashCache.get(urlHash.getBytes());
|
byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
this.url = new URL(new String(entry[1]).trim());
|
this.url = new URL(new String(entry[1]).trim());
|
||||||
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
|
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
|
||||||
this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3])));
|
this.moddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[3])));
|
||||||
this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4])));
|
this.loaddate = new Date(86400000 * serverCodings.enhancedCoder.decodeBase64Long(new String(entry[4])));
|
||||||
this.referrerHash = (entry[5]==null)?dummyHash:new String(entry[5]);
|
this.referrerHash = (entry[5]==null)?dummyHash:new String(entry[5]);
|
||||||
this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6]));
|
this.copyCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[6]));
|
||||||
this.flags = new String(entry[7]);
|
this.flags = new String(entry[7]);
|
||||||
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
|
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
|
||||||
this.language = new String(entry[9]);
|
this.language = new String(entry[9]);
|
||||||
this.doctype = (char) entry[10][0];
|
this.doctype = (char) entry[10][0];
|
||||||
this.size = (long) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
|
this.size = serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
|
||||||
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
|
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
|
||||||
this.snippet = null;
|
this.snippet = null;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString(), e);
|
serverLog.logSevere("PLASMA", "INTERNAL ERROR in plasmaLURL.entry/1: " + e.toString(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user