mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
several bugfixes and dht selection / logging improvement
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@531 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
3610fe6b3a
commit
cd10370992
|
@ -3,7 +3,7 @@ javacSource=1.4
|
|||
javacTarget=1.4
|
||||
|
||||
# Release Configuration
|
||||
releaseVersion=0.395
|
||||
releaseVersion=0.396
|
||||
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.util.Hashtable;
|
||||
import java.io.IOException;
|
||||
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
|
@ -79,40 +80,31 @@ public class query {
|
|||
|
||||
// requests about environment
|
||||
|
||||
if (obj.equals("wordcount")) {
|
||||
// the total number of different words in the rwi is returned
|
||||
prop.put("response", "0"); // dummy response
|
||||
return prop;
|
||||
if (obj.equals("rwiurlcount")) {
|
||||
// the total number of different urls in the rwi is returned
|
||||
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
|
||||
try {
|
||||
de.anomic.plasma.plasmaWordIndexEntity entity = switchboard.wordIndex.getEntity(env, true);
|
||||
prop.put("response", entity.size());
|
||||
entity.close();
|
||||
} catch (IOException e) {
|
||||
prop.put("response", -1);
|
||||
}
|
||||
return prop;
|
||||
}
|
||||
|
||||
if (obj.equals("rwicount")) {
|
||||
// return the number of available word indexes
|
||||
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
|
||||
prop.put("response", "0"); // dummy response
|
||||
// return the total number of available word indexes
|
||||
prop.put("response", switchboard.wordIndex.size());
|
||||
return prop;
|
||||
}
|
||||
|
||||
if (obj.equals("lurlcount")) {
|
||||
// return the number of all available l-url's
|
||||
Hashtable result = switchboard.action("urlcount", null);
|
||||
//System.out.println("URLCOUNT result = " + ((result == null) ? "NULL" : result.toString()));
|
||||
prop.put("response", ((result == null) ? "-1" : (String) result.get("urls")));
|
||||
prop.put("response", switchboard.urlPool.loadedURL.size());
|
||||
return prop;
|
||||
}
|
||||
|
||||
if (obj.equals("purlcount")) {
|
||||
// return number of stacked prefetch urls
|
||||
prop.put("response", "0"); // dummy response
|
||||
return prop;
|
||||
}
|
||||
|
||||
if (obj.equals("seedcount")) {
|
||||
// return number of stacked prefetch urls
|
||||
prop.put("response", "0"); // dummy response
|
||||
return prop;
|
||||
}
|
||||
|
||||
|
||||
// requests about requirements
|
||||
|
||||
if (obj.equals("wantedlurls")) {
|
||||
|
|
|
@ -55,6 +55,7 @@ import de.anomic.plasma.plasmaWordIndexEntryContainer;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyDHTAction;
|
||||
|
||||
public class transferRWI {
|
||||
|
||||
|
@ -124,8 +125,10 @@ public class transferRWI {
|
|||
if (unknownURLs.length() > 0) unknownURLs = unknownURLs.substring(1);
|
||||
if (wordhashes.length == 0)
|
||||
switchboard.getLog().logInfo("Received 0 Words from peer " + iam + ", requested " + unknownURL.size() + " URLs");
|
||||
else
|
||||
switchboard.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "] from peer " + iam + ", requested " + unknownURL.size() + " URLs");
|
||||
else {
|
||||
double avdist = (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[0]) + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[wordhashes.length - 1])) / 2.0;
|
||||
switchboard.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[wordhashes.length - 1] + "]/" + avdist + " from peer " + iam + ", requested " + unknownURL.size() + " URLs");
|
||||
}
|
||||
result = "ok";
|
||||
} else {
|
||||
result = "error_not_granted";
|
||||
|
|
|
@ -416,7 +416,7 @@ public final class plasmaHTCache {
|
|||
return ((ls.indexOf(".cgi") >= 0) ||
|
||||
(ls.indexOf(".exe") >= 0) ||
|
||||
(ls.indexOf(";jsessionid=") >= 0) ||
|
||||
(ls.indexOf("SESSIONID/") >= 0));
|
||||
(ls.indexOf("sessionid/") >= 0));
|
||||
}
|
||||
|
||||
public Entry newEntry(Date initDate, int depth, URL url, String name,
|
||||
|
|
|
@ -1515,15 +1515,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
|
||||
|
||||
public serverObjects action(String actionName, serverObjects actionInput) {
|
||||
// perform an action.
|
||||
// perform an action. (not used)
|
||||
|
||||
if (actionName.equals("urlcount")) {
|
||||
serverObjects result = new serverObjects();
|
||||
result.put("urls", Integer.toString(urlPool.loadedURL.size()));
|
||||
return result;
|
||||
}
|
||||
|
||||
// not a correct query
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
|
@ -517,11 +517,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
|
|||
|
||||
// check cache space
|
||||
if (cache.size() > 0) try {
|
||||
// pause until space is in the cache
|
||||
while (cache.size() >= this.maxWords) Thread.sleep(1000);
|
||||
// pause to get space in the cache (while it is flushed)
|
||||
if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000));
|
||||
|
||||
// slow down if we reach cache limit
|
||||
long pausetime = java.lang.Math.min(10, 3 * cache.size() / (maxWords + 1));
|
||||
long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1));
|
||||
//System.out.println("Pausetime=" + pausetime);
|
||||
Thread.sleep(pausetime);
|
||||
} catch (InterruptedException e) {}
|
||||
|
|
|
@ -13,6 +13,7 @@ import de.anomic.yacy.yacyCore;
|
|||
import de.anomic.yacy.yacySeed;
|
||||
import de.anomic.yacy.yacySeedDB;
|
||||
import de.anomic.yacy.yacyClient;
|
||||
import de.anomic.yacy.yacyDHTAction;
|
||||
import de.anomic.server.serverCodings;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.kelondro.kelondroException;
|
||||
|
@ -133,8 +134,8 @@ public class plasmaWordIndexDistribution {
|
|||
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
|
||||
|
||||
// collect index
|
||||
String startPointHash = yacyCore.seedDB.mySeed.hash;
|
||||
//String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
|
||||
String startPointHash = selectTransferStart();
|
||||
log.logDebug("Selected hash " + startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, startPointHash));
|
||||
Object[] selectResult = selectTransferIndexes(startPointHash, indexCount);
|
||||
plasmaWordIndexEntity[] indexEntities = (plasmaWordIndexEntity[]) selectResult[0];
|
||||
HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry
|
||||
|
@ -157,6 +158,7 @@ public class plasmaWordIndexDistribution {
|
|||
Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash);
|
||||
String error;
|
||||
String peerNames = "";
|
||||
double avdist;
|
||||
while ((e.hasMoreElements()) && (hc < peerCount)) {
|
||||
if (closed) {
|
||||
log.logError("Index distribution interrupted by close, nothing deleted locally.");
|
||||
|
@ -166,7 +168,8 @@ public class plasmaWordIndexDistribution {
|
|||
if (seed != null) {
|
||||
error = yacyClient.transferIndex(seed, indexEntities, urlCache);
|
||||
if (error == null) {
|
||||
log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull");
|
||||
avdist = (yacyDHTAction.dhtDistance(seed.hash, indexEntities[0].wordHash()) + yacyDHTAction.dhtDistance(seed.hash, indexEntities[indexEntities.length-1].wordHash())) / 2.0;
|
||||
log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "]/" + avdist + " to peer " + seed.getName() + ":" + seed.hash + " successfull");
|
||||
peerNames += ", " + seed.getName();
|
||||
hc++;
|
||||
} else {
|
||||
|
@ -207,8 +210,21 @@ public class plasmaWordIndexDistribution {
|
|||
}
|
||||
}
|
||||
|
||||
private String selectTransferStart() {
|
||||
String startPointHash;
|
||||
// first try to select with increasing probality a good start point
|
||||
for (int i = 9; i > 0; i--) {
|
||||
startPointHash = serverCodings.encodeMD5B64(Long.toString(i + System.currentTimeMillis()), true).substring(2, 2 + yacySeedDB.commonHashLength);
|
||||
if (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, startPointHash) > ((double) i / (double) 10)) return startPointHash;
|
||||
}
|
||||
// if that fails, take simply the best start point (this is usually avoided, since that leads to always the same target peers)
|
||||
startPointHash = yacyCore.seedDB.mySeed.hash.substring(0, 11) + "z";
|
||||
return startPointHash;
|
||||
}
|
||||
|
||||
private Object[] /* of {plasmaWordIndexEntity[], HashMap(String, plasmaCrawlLURL.Entry)}*/
|
||||
selectTransferIndexes(String hash, int count) {
|
||||
// the hash is a start hash from where the indexes are picked
|
||||
Vector tmpEntities = new Vector();
|
||||
String nexthash = "";
|
||||
try {
|
||||
|
|
|
@ -380,7 +380,7 @@ public class yacyClient {
|
|||
} catch (NumberFormatException e) {
|
||||
searchtime = totalrequesttime;
|
||||
}
|
||||
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score " + targetPeer.selectscore + "; duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
|
||||
yacyCore.log.logDebug("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
|
||||
return results;
|
||||
} catch (Exception e) {
|
||||
yacyCore.log.logError("yacyClient.search error: '" + targetPeer.get("Name", "anonymous") + "' failed - " + e);
|
||||
|
|
|
@ -226,4 +226,38 @@ public class yacyDHTAction implements yacyPeerAction {
|
|||
|
||||
public void processPeerPing(yacySeed peer) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static double dhtDistance(String peer, String word) {
|
||||
// the dht distance is a positive value between 0 and 1
|
||||
// if the distance is small, the word more probably belongs to the peer
|
||||
double d = hashDistance(peer, word);
|
||||
if (d > 0) {
|
||||
return d; // case where the word is 'before' the peer
|
||||
} else {
|
||||
return 1 + d; // wrap-around case
|
||||
}
|
||||
}
|
||||
|
||||
private static double hashDistance(String from, String to) {
|
||||
// computes the distance between two hashes.
|
||||
// the maximum distance between two hashes is 1, the minimum -1
|
||||
// this can be used like "from - to"
|
||||
// the result is positive if from > to
|
||||
if ((from == null) || (to == null) ||
|
||||
(from.length() == 0) || (to.length() == 0) ||
|
||||
(from.length() != to.length())) return (double) 0.0;
|
||||
return hashDistance(from.charAt(0), to.charAt(0)) + hashDistance(from.substring(1), to.substring(1)) / maxAtomarDistance;
|
||||
}
|
||||
|
||||
private static final double maxAtomarDistance = (double) (1+ ((byte) 'z') - ((byte) '-'));
|
||||
|
||||
private static double hashDistance(char from, char to) {
|
||||
// the distance is a little bit fuzzy, since not all characters are used in a hash.
|
||||
if (from < to)
|
||||
return -hashDistance(to, from);
|
||||
else
|
||||
return ((double) (((byte) from) - ((byte) to))) / maxAtomarDistance;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user