mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) Bugfix for Entries with null url in GlobalQueue
See: http://www.yacy-forum.de/viewtopic.php?p=12675#12675 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1069 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
22da652d4f
commit
444a5a9368
|
@ -100,10 +100,10 @@ public class IndexCreateWWWGlobalQueue_p {
|
|||
yacySeed initiator;
|
||||
String profileHandle;
|
||||
plasmaCrawlProfile.entry profileEntry;
|
||||
int i;
|
||||
int i, showNum = 0;
|
||||
for (i = 0; i < crawlerList.length; i++) {
|
||||
urle = crawlerList[i];
|
||||
if (urle != null) {
|
||||
if ((urle != null)&&(urle.url()!=null)) {
|
||||
initiator = yacyCore.seedDB.getConnected(urle.initiator());
|
||||
profileHandle = urle.profileHandle();
|
||||
profileEntry = (profileHandle == null) ? null : switchboard.profiles.getEntry(profileHandle);
|
||||
|
@ -115,9 +115,10 @@ public class IndexCreateWWWGlobalQueue_p {
|
|||
prop.put("crawler-queue_list_"+i+"_anchor", wikiCode.replaceHTML(urle.name()));
|
||||
prop.put("crawler-queue_list_"+i+"_url", wikiCode.replaceHTML(urle.url().toString()));
|
||||
dark = !dark;
|
||||
showNum++;
|
||||
}
|
||||
}
|
||||
prop.put("crawler-queue_list", i);
|
||||
prop.put("crawler-queue_list", showNum);
|
||||
}
|
||||
|
||||
// return rewrite properties
|
||||
|
|
|
@ -61,7 +61,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
|
|||
import de.anomic.htmlFilter.htmlFilterOutputStream;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.plasma.plasmaCrawlProfile;
|
||||
import de.anomic.plasma.plasmaParser;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.plasma.plasmaURL;
|
||||
import de.anomic.server.serverFileUtils;
|
||||
|
|
|
@ -430,7 +430,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
|
|||
// if the url cannot be found, this returns null
|
||||
this.urlHash = urlHash;
|
||||
try {
|
||||
byte[][] entry = urlHashCache.get(urlHash.getBytes());
|
||||
byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
|
||||
if (entry != null) {
|
||||
this.url = new URL(new String(entry[1]).trim());
|
||||
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2]).trim();
|
||||
|
@ -442,7 +442,7 @@ public final class plasmaCrawlLURL extends plasmaURL {
|
|||
this.quality = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[8]));
|
||||
this.language = new String(entry[9]);
|
||||
this.doctype = (char) entry[10][0];
|
||||
this.size = (long) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
|
||||
this.size = serverCodings.enhancedCoder.decodeBase64Long(new String(entry[11]));
|
||||
this.wordCount = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(entry[12]));
|
||||
this.snippet = null;
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue
Block a user