several bugfixes and dht selection / logging improvement

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@531 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-08-14 00:57:30 +00:00
parent 3610fe6b3a
commit cd10370992
9 changed files with 80 additions and 42 deletions

View File

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.395
releaseVersion=0.396
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

View File

@ -44,6 +44,7 @@
// if the shell's current path is HTROOT
import java.util.Hashtable;
import java.io.IOException;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
@ -79,40 +80,31 @@ public class query {
// requests about environment
if (obj.equals("wordcount")) {
// the total number of different words in the rwi is returned
prop.put("response", "0"); // dummy response
if (obj.equals("rwiurlcount")) {
// the total number of different urls in the rwi is returned
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
try {
de.anomic.plasma.plasmaWordIndexEntity entity = switchboard.wordIndex.getEntity(env, true);
prop.put("response", entity.size());
entity.close();
} catch (IOException e) {
prop.put("response", -1);
}
return prop;
}
if (obj.equals("rwicount")) {
// return the number of available word indexes
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
prop.put("response", "0"); // dummy response
// return the total number of available word indexes
prop.put("response", switchboard.wordIndex.size());
return prop;
}
if (obj.equals("lurlcount")) {
// return the number of all available l-url's
Hashtable result = switchboard.action("urlcount", null);
//System.out.println("URLCOUNT result = " + ((result == null) ? "NULL" : result.toString()));
prop.put("response", ((result == null) ? "-1" : (String) result.get("urls")));
prop.put("response", switchboard.urlPool.loadedURL.size());
return prop;
}
if (obj.equals("purlcount")) {
// return number of stacked prefetch urls
prop.put("response", "0"); // dummy response
return prop;
}
if (obj.equals("seedcount")) {
// return number of stacked prefetch urls
prop.put("response", "0"); // dummy response
return prop;
}
// requests about requirements
if (obj.equals("wantedlurls")) {

View File

@ -55,6 +55,7 @@ import de.anomic.plasma.plasmaWordIndexEntryContainer;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyDHTAction;
public class transferRWI {
@ -124,8 +125,10 @@ public class transferRWI {
if (unknownURLs.length() > 0) unknownURLs = unknownURLs.substring(1);
if (wordhashes.length == 0)
switchboard.getLog().logInfo("Received 0 Words from peer " + iam + ", requested " + unknownURL.size() + " URLs");
else
switchboard.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "] from peer " + iam + ", requested " + unknownURL.size() + " URLs");
else {
double avdist = (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[0]) + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[wordhashes.length - 1])) / 2.0;
switchboard.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "]/" + avdist + " from peer " + iam + ", requested " + unknownURL.size() + " URLs");
}
result = "ok";
} else {
result = "error_not_granted";

View File

@ -416,7 +416,7 @@ public final class plasmaHTCache {
return ((ls.indexOf(".cgi") >= 0) ||
(ls.indexOf(".exe") >= 0) ||
(ls.indexOf(";jsessionid=") >= 0) ||
(ls.indexOf("SESSIONID/") >= 0));
(ls.indexOf("sessionid/") >= 0));
}
public Entry newEntry(Date initDate, int depth, URL url, String name,

View File

@ -1515,15 +1515,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public serverObjects action(String actionName, serverObjects actionInput) {
// perform an action.
// perform an action. (not used)
if (actionName.equals("urlcount")) {
serverObjects result = new serverObjects();
result.put("urls", Integer.toString(urlPool.loadedURL.size()));
return result;
}
// not a correct query
return null;
}

View File

@ -517,11 +517,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// check cache space
if (cache.size() > 0) try {
// pause until space is in the cache
while (cache.size() >= this.maxWords) Thread.sleep(1000);
// pause to get space in the cache (while it is flushed)
if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000));
// slow down if we reach cache limit
long pausetime = java.lang.Math.min(10, 3 * cache.size() / (maxWords + 1));
long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1));
//System.out.println("Pausetime=" + pausetime);
Thread.sleep(pausetime);
} catch (InterruptedException e) {}

View File

@ -13,6 +13,7 @@ import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.server.serverCodings;
import de.anomic.server.logging.serverLog;
import de.anomic.kelondro.kelondroException;
@ -133,8 +134,8 @@ public class plasmaWordIndexDistribution {
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
// collect index
String startPointHash = yacyCore.seedDB.mySeed.hash;
//String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
String startPointHash = selectTransferStart();
log.logDebug("Selected hash " + startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, startPointHash));
Object[] selectResult = selectTransferIndexes(startPointHash, indexCount);
plasmaWordIndexEntity[] indexEntities = (plasmaWordIndexEntity[]) selectResult[0];
HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry
@ -157,6 +158,7 @@ public class plasmaWordIndexDistribution {
Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash);
String error;
String peerNames = "";
double avdist;
while ((e.hasMoreElements()) && (hc < peerCount)) {
if (closed) {
log.logError("Index distribution interrupted by close, nothing deleted locally.");
@ -166,7 +168,8 @@ public class plasmaWordIndexDistribution {
if (seed != null) {
error = yacyClient.transferIndex(seed, indexEntities, urlCache);
if (error == null) {
log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull");
avdist = (yacyDHTAction.dhtDistance(seed.hash, indexEntities[0].wordHash()) + yacyDHTAction.dhtDistance(seed.hash, indexEntities[indexEntities.length-1].wordHash())) / 2.0;
log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "]/" + avdist + " to peer " + seed.getName() + ":" + seed.hash + " successfull");
peerNames += ", " + seed.getName();
hc++;
} else {
@ -207,8 +210,21 @@ public class plasmaWordIndexDistribution {
}
}
private String selectTransferStart() {
String startPointHash;
// first try to select with increasing probality a good start point
for (int i = 9; i > 0; i--) {
startPointHash = serverCodings.encodeMD5B64(Long.toString(i + System.currentTimeMillis()), true).substring(2, 2 + yacySeedDB.commonHashLength);
if (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, startPointHash) > ((double) i / (double) 10)) return startPointHash;
}
// if that fails, take simply the best start point (this is usually avoided, since that leads to always the same target peers)
startPointHash = yacyCore.seedDB.mySeed.hash.substring(0, 11) + "z";
return startPointHash;
}
private Object[] /* of {plasmaWordIndexEntity[], HashMap(String, plasmaCrawlLURL.Entry)}*/
selectTransferIndexes(String hash, int count) {
// the hash is a start hash from where the indexes are picked
Vector tmpEntities = new Vector();
String nexthash = "";
try {

View File

@ -380,7 +380,7 @@ public class yacyClient {
} catch (NumberFormatException e) {
searchtime = totalrequesttime;
}
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score " + targetPeer.selectscore + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
return results;
} catch (Exception e) {
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);

View File

@ -226,4 +226,38 @@ public class yacyDHTAction implements yacyPeerAction {
public void processPeerPing(yacySeed peer) {
}
public static double dhtDistance(String peer, String word) {
// the dht distance is a positive value between 0 and 1
// if the distance is small, the word more probably belongs to the peer
double d = hashDistance(peer, word);
if (d > 0) {
return d; // case where the word is 'before' the peer
} else {
return 1 + d; // wrap-around case
}
}
private static double hashDistance(String from, String to) {
// computes the distance between two hashes.
// the maximum distance between two hashes is 1, the minimum -1
// this can be used like "from - to"
// the result is positive if from > to
if ((from == null) || (to == null) ||
(from.length() == 0) || (to.length() == 0) ||
(from.length() != to.length())) return (double) 0.0;
return hashDistance(from.charAt(0), to.charAt(0)) + hashDistance(from.substring(1), to.substring(1)) / maxAtomarDistance;
}
private static final double maxAtomarDistance = (double) (1+ ((byte) 'z') - ((byte) '-'));
private static double hashDistance(char from, char to) {
// the distance is a little bit fuzzy, since not all characters are used in a hash.
if (from < to)
return -hashDistance(to, from);
else
return ((double) (((byte) from) - ((byte) to))) / maxAtomarDistance;
}
}