mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@529 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
da81dcd66a
commit
c8a7a85ce2
|
@ -1047,13 +1047,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
|
||||
String reason = null; // failure reason
|
||||
|
||||
// strange error
|
||||
// strange errors
|
||||
if (nexturlString == null) {
|
||||
reason = "denied_(url_null)";
|
||||
log.logError("Wrong URL in stackCrawl: url=null");
|
||||
return reason;
|
||||
}
|
||||
|
||||
/*
|
||||
if (profile == null) {
|
||||
reason = "denied_(profile_null)";
|
||||
log.logError("Wrong Profile for stackCrawl: profile=null");
|
||||
return reason;
|
||||
}
|
||||
*/
|
||||
URL nexturl = null;
|
||||
if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash;
|
||||
String referrerHash = plasmaURL.urlHash(referrerString);
|
||||
|
@ -1066,7 +1072,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
}
|
||||
|
||||
// filter deny
|
||||
if ((currentdepth > 0) && (!(nexturlString.matches(profile.generalFilter())))) {
|
||||
if ((currentdepth > 0) && (profile != null) && (!(nexturlString.matches(profile.generalFilter())))) {
|
||||
reason = "denied_(does_not_match_filter)";
|
||||
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
|
||||
name, reason, new bitfield(plasmaURL.urlFlagLength), false);
|
||||
|
@ -1082,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
}
|
||||
|
||||
// deny post properties
|
||||
if ((plasmaHTCache.isPOST(nexturlString)) && (!(profile.crawlingQ()))) {
|
||||
if ((plasmaHTCache.isPOST(nexturlString)) && (profile != null) && (!(profile.crawlingQ()))) {
|
||||
reason = "denied_(post_url)";
|
||||
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
|
||||
name, reason, new bitfield(plasmaURL.urlFlagLength), false);
|
||||
|
@ -1102,6 +1108,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
// store information
|
||||
boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
|
||||
boolean global =
|
||||
(profile != null) &&
|
||||
(profile.remoteIndexing()) /* granted */ &&
|
||||
(currentdepth == profile.generalDepth()) /* leaf node */ &&
|
||||
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
|
||||
|
@ -1113,7 +1120,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
loadDate, /* load date */
|
||||
referrerHash, /* last url in crawling queue */
|
||||
name, /* the anchor name */
|
||||
profile.handle(),
|
||||
(profile == null) ? null : profile.handle(),
|
||||
currentdepth, /*depth so far*/
|
||||
0, /*anchors, default value */
|
||||
0, /*forkfactor, default value */
|
||||
|
|
|
@ -227,10 +227,10 @@ public class plasmaWordIndexDistribution {
|
|||
indexEntity.deleteComplete();
|
||||
} else if (indexEntity.size() <= count) {
|
||||
// take the whole entity
|
||||
// fist check if we know all urls
|
||||
urlEnum = indexEntity.elements(true);
|
||||
unknownURLEntries = new HashSet();
|
||||
try {
|
||||
// fist check if we know all urls
|
||||
urlEnum = indexEntity.elements(true);
|
||||
unknownURLEntries = new HashSet();
|
||||
while (urlEnum.hasMoreElements()) {
|
||||
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
|
||||
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
|
||||
|
@ -245,26 +245,26 @@ public class plasmaWordIndexDistribution {
|
|||
}
|
||||
}
|
||||
}
|
||||
// now delete all entries that have no url entry
|
||||
hashIter = unknownURLEntries.iterator();
|
||||
while (hashIter.hasNext()) {
|
||||
indexEntity.removeEntry((String) hashIter.next(), false);
|
||||
}
|
||||
// use whats remaining
|
||||
tmpEntities.add(indexEntity);
|
||||
log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash());
|
||||
count -= indexEntity.size();
|
||||
} catch (kelondroException e) {
|
||||
log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash());
|
||||
e.printStackTrace();
|
||||
try {indexEntity.deleteComplete();} catch (IOException ee) {}
|
||||
}
|
||||
// now delete all entries that have no url entry
|
||||
hashIter = unknownURLEntries.iterator();
|
||||
while (hashIter.hasNext()) {
|
||||
indexEntity.removeEntry((String) hashIter.next(), false);
|
||||
}
|
||||
// use whats remaining
|
||||
tmpEntities.add(indexEntity);
|
||||
log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash());
|
||||
count -= indexEntity.size();
|
||||
} else {
|
||||
// make an on-the-fly entity and insert values
|
||||
tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
|
||||
urlEnum = indexEntity.elements(true);
|
||||
unknownURLEntries = new HashSet();
|
||||
try {
|
||||
urlEnum = indexEntity.elements(true);
|
||||
unknownURLEntries = new HashSet();
|
||||
while ((urlEnum.hasMoreElements()) && (count > 0)) {
|
||||
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
|
||||
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
|
||||
|
@ -281,19 +281,19 @@ public class plasmaWordIndexDistribution {
|
|||
}
|
||||
}
|
||||
}
|
||||
// now delete all entries that have no url entry
|
||||
hashIter = unknownURLEntries.iterator();
|
||||
while (hashIter.hasNext()) {
|
||||
indexEntity.removeEntry((String) hashIter.next(), true);
|
||||
}
|
||||
// use whats remaining
|
||||
log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());
|
||||
tmpEntities.add(tmpEntity);
|
||||
} catch (kelondroException e) {
|
||||
log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash());
|
||||
e.printStackTrace();
|
||||
try {indexEntity.deleteComplete();} catch (IOException ee) {}
|
||||
}
|
||||
// now delete all entries that have no url entry
|
||||
hashIter = unknownURLEntries.iterator();
|
||||
while (hashIter.hasNext()) {
|
||||
indexEntity.removeEntry((String) hashIter.next(), true);
|
||||
}
|
||||
// use whats remaining
|
||||
log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());
|
||||
tmpEntities.add(tmpEntity);
|
||||
indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
|
||||
indexEntity = null;
|
||||
}
|
||||
|
|
|
@ -363,14 +363,17 @@ public class yacySeed {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
// set hash into seed code structure
|
||||
dna.put("Hash", this.hash);
|
||||
// generate string representation
|
||||
String s = dna.toString();
|
||||
// reconstruct original: hash is stored external
|
||||
dna.remove("Hash");
|
||||
// return string
|
||||
return s;
|
||||
String s = null;
|
||||
synchronized (dna) {
|
||||
// set hash into seed code structure
|
||||
dna.put("Hash", this.hash);
|
||||
// generate string representation
|
||||
s = dna.toString();
|
||||
// reconstruct original: hash is stored external
|
||||
dna.remove("Hash");
|
||||
// return string
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
public String genSeedStr(String key) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user