git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@529 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-08-13 21:41:22 +00:00
parent da81dcd66a
commit c8a7a85ce2
3 changed files with 45 additions and 35 deletions

View File

@ -1047,13 +1047,19 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String reason = null; // failure reason
// strange error
// strange errors
if (nexturlString == null) {
reason = "denied_(url_null)";
log.logError("Wrong URL in stackCrawl: url=null");
return reason;
}
/*
if (profile == null) {
reason = "denied_(profile_null)";
log.logError("Wrong Profile for stackCrawl: profile=null");
return reason;
}
*/
URL nexturl = null;
if ((initiatorHash == null) || (initiatorHash.length() == 0)) initiatorHash = plasmaURL.dummyHash;
String referrerHash = plasmaURL.urlHash(referrerString);
@ -1066,7 +1072,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// filter deny
if ((currentdepth > 0) && (!(nexturlString.matches(profile.generalFilter())))) {
if ((currentdepth > 0) && (profile != null) && (!(nexturlString.matches(profile.generalFilter())))) {
reason = "denied_(does_not_match_filter)";
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
name, reason, new bitfield(plasmaURL.urlFlagLength), false);
@ -1082,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
// deny post properties
if ((plasmaHTCache.isPOST(nexturlString)) && (!(profile.crawlingQ()))) {
if ((plasmaHTCache.isPOST(nexturlString)) && (profile != null) && (!(profile.crawlingQ()))) {
reason = "denied_(post_url)";
urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
name, reason, new bitfield(plasmaURL.urlFlagLength), false);
@ -1102,6 +1108,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// store information
boolean local = ((initiatorHash.equals(plasmaURL.dummyHash)) || (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)));
boolean global =
(profile != null) &&
(profile.remoteIndexing()) /* granted */ &&
(currentdepth == profile.generalDepth()) /* leaf node */ &&
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
@ -1113,7 +1120,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
loadDate, /* load date */
referrerHash, /* last url in crawling queue */
name, /* the anchor name */
profile.handle(),
(profile == null) ? null : profile.handle(),
currentdepth, /*depth so far*/
0, /*anchors, default value */
0, /*forkfactor, default value */

View File

@ -227,10 +227,10 @@ public class plasmaWordIndexDistribution {
indexEntity.deleteComplete();
} else if (indexEntity.size() <= count) {
// take the whole entity
// fist check if we know all urls
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
try {
// fist check if we know all urls
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
while (urlEnum.hasMoreElements()) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
@ -245,26 +245,26 @@ public class plasmaWordIndexDistribution {
}
}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) {
indexEntity.removeEntry((String) hashIter.next(), false);
}
// use whats remaining
tmpEntities.add(indexEntity);
log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash());
count -= indexEntity.size();
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) {
indexEntity.removeEntry((String) hashIter.next(), false);
}
// use whats remaining
tmpEntities.add(indexEntity);
log.logDebug("Selected whole index (" + indexEntity.size() + " URLs, " + unknownURLEntries.size() + " not bound) for word " + indexEntity.wordHash());
count -= indexEntity.size();
} else {
// make an on-the-fly entity and insert values
tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
try {
urlEnum = indexEntity.elements(true);
unknownURLEntries = new HashSet();
while ((urlEnum.hasMoreElements()) && (count > 0)) {
indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
lurl = urlPool.loadedURL.getEntry(indexEntry.getUrlHash());
@ -281,19 +281,19 @@ public class plasmaWordIndexDistribution {
}
}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) {
indexEntity.removeEntry((String) hashIter.next(), true);
}
// use whats remaining
log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());
tmpEntities.add(tmpEntity);
} catch (kelondroException e) {
log.logError("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash());
e.printStackTrace();
try {indexEntity.deleteComplete();} catch (IOException ee) {}
}
// now delete all entries that have no url entry
hashIter = unknownURLEntries.iterator();
while (hashIter.hasNext()) {
indexEntity.removeEntry((String) hashIter.next(), true);
}
// use whats remaining
log.logDebug("Selected partial index (" + tmpEntity.size() + " from " + indexEntity.size() +" URLs, " + unknownURLEntries.size() + " not bound) for word " + tmpEntity.wordHash());
tmpEntities.add(tmpEntity);
indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
indexEntity = null;
}

View File

@ -363,14 +363,17 @@ public class yacySeed {
}
public String toString() {
// set hash into seed code structure
dna.put("Hash", this.hash);
// generate string representation
String s = dna.toString();
// reconstruct original: hash is stored external
dna.remove("Hash");
// return string
return s;
String s = null;
synchronized (dna) {
// set hash into seed code structure
dna.put("Hash", this.hash);
// generate string representation
s = dna.toString();
// reconstruct original: hash is stored external
dna.remove("Hash");
// return string
}
return s;
}
public String genSeedStr(String key) {