mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Design-check, Extension and Refactoring of DHT target position computation:
- two different computations (but mathematical equivalent) of the DHT distance had been consolidated - moved from 0.0 .. 1.0 double-range position computation to 0 .. Long.Max range for DHT targets - added fast Long - to - hash computation - high-precision target computation of gaps for new peers - added new target computation for horizontal and vertical DHT targets (not yet in use) - old horizontal-only DHT targets will be upwards compatible to new horizontal and vertical DHT positions git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5318 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
dd27ce7216
commit
d014b2728a
|
@ -43,7 +43,6 @@ import de.anomic.tools.nxTools;
|
|||
import de.anomic.xml.RSSFeed;
|
||||
import de.anomic.xml.RSSMessage;
|
||||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyDHTAction;
|
||||
import de.anomic.yacy.yacyNetwork;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
|
||||
|
@ -193,7 +192,7 @@ public final class transferRWI {
|
|||
if ((wordhashes.length == 0) || (received == 0)) {
|
||||
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs");
|
||||
} else {
|
||||
final double avdist = (yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[0]) + yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[received - 1])) / 2.0;
|
||||
final long avdist = (yacySeed.dhtDistance(wordhashes[0], sb.webIndex.seedDB.mySeed()) + yacySeed.dhtDistance(wordhashes[received - 1], sb.webIndex.seedDB.mySeed())) / 2;
|
||||
sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs");
|
||||
RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", requesting " + unknownURL.size() + " URLs, blocked " + blocked, "", ""));
|
||||
}
|
||||
|
|
|
@ -263,6 +263,16 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
|
|||
return c;
|
||||
}
|
||||
|
||||
public final byte[] uncardinal(long c) {
|
||||
c = c >> 3;
|
||||
byte[] b = new byte[10];
|
||||
for (int p = 9; p >= 0; p--) {
|
||||
b[p] = (byte) alpha[(int) (c & 0x3fL)];
|
||||
c = c >> 6;
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
public final long cardinal(final byte[] key) {
|
||||
if (this.zero == null) return cardinalI(key);
|
||||
final long zeroCardinal = cardinalI(this.zero);
|
||||
|
@ -373,9 +383,10 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
|
|||
}
|
||||
|
||||
public static void main(final String[] s) {
|
||||
// java -classpath classes de.anomic.kelondro.kelondroBase64Order
|
||||
final kelondroBase64Order b64 = new kelondroBase64Order(true, true);
|
||||
if (s.length == 0) {
|
||||
System.out.println("usage: -[ec|dc|es|ds|s2m] <arg>");
|
||||
System.out.println("usage: -[ec|dc|es|ds|clcn] <arg>");
|
||||
System.exit(0);
|
||||
}
|
||||
if (s[0].equals("-ec")) {
|
||||
|
@ -394,5 +405,13 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
|
|||
// generate a b64 decoding from a given string
|
||||
System.out.println(b64.decodeString(s[1], ""));
|
||||
}
|
||||
if (s[0].equals("-cl")) {
|
||||
// return the cardinal of a given string as long value with the enhanced encoder
|
||||
System.out.println(kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes()));
|
||||
}
|
||||
if (s[0].equals("-cn")) {
|
||||
// return the cardinal of a given string as normalized float 0 .. 1 with the enhanced encoder
|
||||
System.out.println(((double) kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes())) / ((double) Long.MAX_VALUE));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order;
|
|||
import de.anomic.kelondro.kelondroException;
|
||||
import de.anomic.server.serverCodings;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.yacy.yacyDHTAction;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import de.anomic.yacy.yacySeedDB;
|
||||
|
||||
public class plasmaDHTChunk {
|
||||
|
@ -112,7 +112,7 @@ public class plasmaDHTChunk {
|
|||
this.log = log;
|
||||
this.wordIndex = wordIndex;
|
||||
this.startPointHash = selectTransferStart();
|
||||
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash));
|
||||
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
|
||||
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
|
||||
|
||||
// count the indexes, can be smaller as expected
|
||||
|
@ -130,7 +130,7 @@ public class plasmaDHTChunk {
|
|||
try {
|
||||
this.log = log;
|
||||
this.wordIndex = wordIndex;
|
||||
if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash));
|
||||
if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
|
||||
selectTransferContainers(startHash, minCount, maxCount, maxtime);
|
||||
|
||||
// count the indexes, can be smaller as expected
|
||||
|
@ -145,27 +145,7 @@ public class plasmaDHTChunk {
|
|||
}
|
||||
|
||||
private String selectTransferStart() {
|
||||
String startPointHash;
|
||||
// first try to select with increasing probality a good start point
|
||||
final double minimumDistance = ((double) peerRedundancy) / ((double) wordIndex.seedDB.sizeConnected());
|
||||
double d, bestDistance = 0.0;
|
||||
String bestHash = null;
|
||||
for (int i = wordIndex.seedDB.sizeConnected() / 8; i > 0; i--) {
|
||||
startPointHash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(i + System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
|
||||
d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, startPointHash);
|
||||
if (d > (minimumDistance + ((double) i / (double) 10))) {
|
||||
return startPointHash;
|
||||
}
|
||||
if (d > bestDistance) {
|
||||
bestDistance = d;
|
||||
bestHash = startPointHash;
|
||||
}
|
||||
}
|
||||
// if that fails, take simply the best start point
|
||||
if (bestHash == null) {
|
||||
return wordIndex.seedDB.mySeed().hash.substring(0, 11) + "z";
|
||||
}
|
||||
return bestHash;
|
||||
return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
|
||||
}
|
||||
|
||||
private void selectTransferContainers(final String hash, final int mincount, final int maxcount, final int maxtime) throws InterruptedException {
|
||||
|
@ -198,7 +178,7 @@ public class plasmaDHTChunk {
|
|||
int wholesize;
|
||||
|
||||
urlCache = new HashMap<String, indexURLReference>();
|
||||
final double maximumDistance = ((double) peerRedundancy * 2) / (wordIndex.seedDB.sizeConnected());
|
||||
final long maximumDistanceLong = Long.MAX_VALUE / wordIndex.seedDB.sizeConnected() * peerRedundancy * 2;
|
||||
final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
|
||||
while (
|
||||
(maxcount > refcount) &&
|
||||
|
@ -206,7 +186,7 @@ public class plasmaDHTChunk {
|
|||
((container = indexContainerIterator.next()) != null) &&
|
||||
(container.size() > 0) &&
|
||||
((tmpContainers.size() == 0) ||
|
||||
(yacyDHTAction.dhtDistance(container.getWordHash(), tmpContainers.get(0).getWordHash()) < maximumDistance)) &&
|
||||
(Math.abs(yacySeed.dhtPosition(container.getWordHash()) - yacySeed.dhtPosition(tmpContainers.get(0).getWordHash())) < maximumDistanceLong)) &&
|
||||
(System.currentTimeMillis() < timeout)
|
||||
) {
|
||||
// check for interruption
|
||||
|
|
|
@ -123,14 +123,12 @@ public class plasmaGrafics {
|
|||
final int cx = eventPicture.getWidth() / 2;
|
||||
final int cy = eventPicture.getHeight() / 2;
|
||||
|
||||
String hash;
|
||||
int angle;
|
||||
|
||||
// draw in the primary search peers
|
||||
for (int j = 0; j < primarySearches.length; j++) {
|
||||
eventPicture.setColor((primarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN);
|
||||
hash = primarySearches[j].target().hash;
|
||||
angle = (int) (360 * yacySeed.dhtPosition(hash));
|
||||
angle = (int) (360.0 * (((double) primarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE)));
|
||||
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
|
||||
}
|
||||
|
||||
|
@ -138,8 +136,7 @@ public class plasmaGrafics {
|
|||
if (secondarySearches != null) {
|
||||
for (int j = 0; j < secondarySearches.length; j++) {
|
||||
eventPicture.setColor((secondarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN);
|
||||
hash = secondarySearches[j].target().hash;
|
||||
angle = (int) (360 * yacySeed.dhtPosition(hash));
|
||||
angle = (int) (360.0 * (((double) secondarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE)));
|
||||
eventPicture.arcLine(cx, cy, cr - 10, cr, angle - 1);
|
||||
eventPicture.arcLine(cx, cy, cr - 10, cr, angle + 1);
|
||||
}
|
||||
|
@ -150,8 +147,7 @@ public class plasmaGrafics {
|
|||
final Iterator<String> i = query.queryHashes.iterator();
|
||||
eventPicture.setColor(ymageMatrix.GREY);
|
||||
while (i.hasNext()) {
|
||||
hash = i.next();
|
||||
angle = (int) (360 * yacySeed.dhtPosition(hash));
|
||||
angle = (int) (360.0 * (((double) yacySeed.dhtPosition(i.next())) / ((double) Long.MAX_VALUE)));
|
||||
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
|
||||
}
|
||||
|
||||
|
@ -249,7 +245,7 @@ public class plasmaGrafics {
|
|||
final String name = seed.getName().toUpperCase() /*+ ":" + seed.hash + ":" + (((double) ((int) (100 * (((double) yacySeed.dhtPosition(seed.hash)) / ((double) yacySeed.maxDHTDistance))))) / 100.0)*/;
|
||||
if (name.length() < shortestName) shortestName = name.length();
|
||||
if (name.length() > longestName) longestName = name.length();
|
||||
final int angle = (int) (360 * seed.dhtPosition());
|
||||
final int angle = (int) (360.0 * (((double) seed.dhtPosition()) / ((double) Long.MAX_VALUE)));
|
||||
//System.out.println("Seed " + seed.hash + " has distance " + seed.dhtDistance() + ", angle = " + angle);
|
||||
int linelength = 20 + outerradius * (20 * (name.length() - shortestName) / (longestName - shortestName) + Math.abs(seed.hash.hashCode() % 20)) / 60;
|
||||
if (linelength > outerradius) linelength = outerradius;
|
||||
|
|
|
@ -49,7 +49,6 @@ import de.anomic.kelondro.kelondroSortStore;
|
|||
import de.anomic.plasma.plasmaSnippetCache.MediaSnippet;
|
||||
import de.anomic.server.serverProfiling;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.yacy.yacyDHTAction;
|
||||
import de.anomic.yacy.yacySearch;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
|
@ -174,7 +173,7 @@ public final class plasmaSearchEvent {
|
|||
final Iterator<Map.Entry<String, indexContainer>> ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator();
|
||||
Map.Entry<String, indexContainer> entry;
|
||||
int maxcount = -1;
|
||||
double mindhtdistance = 1.1, d;
|
||||
long mindhtdistance = Long.MAX_VALUE, l;
|
||||
String wordhash;
|
||||
while (ci.hasNext()) {
|
||||
entry = ci.next();
|
||||
|
@ -185,10 +184,10 @@ public final class plasmaSearchEvent {
|
|||
IAmaxcounthash = wordhash;
|
||||
maxcount = container.size();
|
||||
}
|
||||
d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, wordhash);
|
||||
if (d < mindhtdistance) {
|
||||
l = yacySeed.dhtDistance(wordhash, wordIndex.seedDB.mySeed());
|
||||
if (l < mindhtdistance) {
|
||||
// calculate the word hash that is closest to our dht position
|
||||
mindhtdistance = d;
|
||||
mindhtdistance = l;
|
||||
IAneardhthash = wordhash;
|
||||
}
|
||||
IACount.put(wordhash, Integer.valueOf(container.size()));
|
||||
|
|
|
@ -2005,8 +2005,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
|
||||
try {
|
||||
// find a list of DHT-peers
|
||||
final double maxDist = 0.3;
|
||||
final ArrayList<yacySeed> seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, Math.min(8, (int) (this.webIndex.seedDB.sizeConnected() * maxDist)), dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash(), maxDist);
|
||||
final ArrayList<yacySeed> seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, 9, dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash());
|
||||
if (seeds.size() < peerCount) {
|
||||
log.logWarning("found not enough (" + seeds.size() + ") peers for distribution for dhtchunk [" + dhtChunk.firstContainer().getWordHash() + " .. " + dhtChunk.lastContainer().getWordHash() + "]");
|
||||
return false;
|
||||
|
|
|
@ -479,7 +479,7 @@ public final class yacyClient {
|
|||
try {
|
||||
result = nxTools.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000), "UTF-8");
|
||||
} catch (final IOException e) {
|
||||
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(target.hash, wordhashes.substring(0, 12)));
|
||||
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacySeed.dhtDistance(wordhashes.substring(0, 12), target));
|
||||
//yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
|
@ -492,8 +492,7 @@ public final class yacyClient {
|
|||
+ " (zero response), score="
|
||||
+ target.selectscore
|
||||
+ ", DHTdist="
|
||||
+ yacyDHTAction.dhtDistance(target.hash, wordhashes
|
||||
.substring(0, 12)));
|
||||
+ yacySeed.dhtDistance(wordhashes.substring(0, 12), target));
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -653,9 +652,7 @@ public final class yacyClient {
|
|||
+ ", score="
|
||||
+ target.selectscore
|
||||
+ ", DHTdist="
|
||||
+ ((wordhashes.length() < 12) ? "void" : Double
|
||||
.toString(yacyDHTAction.dhtDistance(target.hash,
|
||||
wordhashes.substring(0, 12))))
|
||||
+ ((wordhashes.length() < 12) ? "void" : yacySeed.dhtDistance(wordhashes.substring(0, 12), target))
|
||||
+ ", searchtime=" + searchtime + ", netdelay="
|
||||
+ (totalrequesttime - searchtime) + ", references="
|
||||
+ result.get("references"));
|
||||
|
|
|
@ -302,35 +302,13 @@ public class yacyDHTAction {
|
|||
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash) {
|
||||
if (seedDB == null) return false;
|
||||
if (seedDB.mySeed().isPotential()) return false;
|
||||
final double distance = dhtDistance(seedDB.mySeed().hash, wordhash);
|
||||
final double max = 1.2 / seedDB.sizeConnected();
|
||||
final long distance = yacySeed.dhtDistance(wordhash, seedDB.mySeed());
|
||||
final long max = Long.MAX_VALUE / seedDB.sizeConnected() * 2;
|
||||
//System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max);
|
||||
return (distance > 0) && (distance <= max);
|
||||
}
|
||||
|
||||
public static double dhtDistance(final String peer, final String word) {
|
||||
// the dht distance is a positive value between 0 and 1
|
||||
// if the distance is small, the word more probably belongs to the peer
|
||||
final double d = hashDistance(peer, word);
|
||||
if (d > 0) {
|
||||
return d; // case where the word is 'before' the peer
|
||||
}
|
||||
return 1 + d; // wrap-around case
|
||||
}
|
||||
|
||||
private static double hashDistance(final String from, final String to) {
|
||||
// computes the distance between two hashes.
|
||||
// the maximum distance between two hashes is 1, the minimum -1
|
||||
// this can be used like "from - to"
|
||||
// the result is positive if from > to
|
||||
assert (from != null);
|
||||
assert (to != null);
|
||||
assert (from.length() == 12) : "from.length = " + from.length() + ", from = " + from;
|
||||
assert (to.length() == 12) : "to.length = " + to.length() + ", to = " + to;
|
||||
return ((double) (kelondroBase64Order.enhancedCoder.cardinal(from.getBytes()) - kelondroBase64Order.enhancedCoder.cardinal(to.getBytes()))) / ((double) Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
public synchronized ArrayList<yacySeed> getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey, final double maxDist) {
|
||||
public synchronized ArrayList<yacySeed> getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey) {
|
||||
// find a list of DHT-peers
|
||||
assert firstKey != null;
|
||||
assert lastKey != null;
|
||||
|
@ -347,7 +325,7 @@ public class yacyDHTAction {
|
|||
//double ownDistance = Math.min(yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, firstKey), yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, lastKey));
|
||||
//double maxDistance = Math.min(ownDistance, maxDist);
|
||||
|
||||
double firstdist, lastdist;
|
||||
long firstdist, lastdist;
|
||||
Iterator<yacySeed> e = this.getAcceptRemoteIndexSeeds(lastKey);
|
||||
final TreeSet<String> doublecheck = new TreeSet<String>(kelondroBase64Order.enhancedComparator);
|
||||
int maxloop = Math.min(100, seedDB.sizeConnected()); // to ensure termination
|
||||
|
@ -355,11 +333,9 @@ public class yacyDHTAction {
|
|||
while ((e.hasNext()) && (seeds.size() < (primaryPeerCount + reservePeerCount)) && (maxloop-- > 0)) {
|
||||
seed = e.next();
|
||||
if (seed == null || seed.hash == null) continue;
|
||||
firstdist = yacyDHTAction.dhtDistance(seed.hash, firstKey);
|
||||
lastdist = yacyDHTAction.dhtDistance(seed.hash, lastKey);
|
||||
if (lastdist > maxDist) {
|
||||
if (log != null && yacyCore.log.isFine()) log.logFine("Discarded too distant DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);
|
||||
} else if (doublecheck.contains(seed.hash)) {
|
||||
firstdist = yacySeed.dhtDistance(firstKey, seed);
|
||||
lastdist = yacySeed.dhtDistance(lastKey, seed);
|
||||
if (doublecheck.contains(seed.hash)) {
|
||||
if (log != null && yacyCore.log.isFine()) log.logFine("Discarded double DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);
|
||||
} else {
|
||||
if (log != null) log.logInfo("Selected " + ((seeds.size() < primaryPeerCount) ? "primary" : "reserve") + " DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);
|
||||
|
|
|
@ -187,7 +187,7 @@ public class yacySearch extends Thread {
|
|||
Iterator<yacySeed> dhtEnum;
|
||||
int c;
|
||||
String wordhash;
|
||||
double distance;
|
||||
long distance;
|
||||
Iterator<String> iter = wordhashes.iterator();
|
||||
while (iter.hasNext()) {
|
||||
wordhash = iter.next();
|
||||
|
@ -196,8 +196,7 @@ public class yacySearch extends Thread {
|
|||
while (dhtEnum.hasNext() && c > 0) {
|
||||
seed = dhtEnum.next();
|
||||
if (seed == null || seed.hash == null) continue;
|
||||
distance = yacyDHTAction.dhtDistance(seed.hash, wordhash);
|
||||
if (distance > 0.2) continue; // catch bug in peer selection
|
||||
distance = yacySeed.dhtDistance(wordhash, seed);
|
||||
if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
|
||||
if (serverLog.isFine("PLASMA")) serverLog.logFine("PLASMA", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/" + distance + " for wordhash " + wordhash + ", score " + c);
|
||||
ranking.addScore(seed.hash, c--);
|
||||
|
|
|
@ -668,27 +668,122 @@ public class yacySeed implements Cloneable {
|
|||
return type.equals(yacySeed.PEERTYPE_SENIOR) || type.equals(yacySeed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
||||
public static final long minDHTNumber = kelondroBase64Order.enhancedCoder.cardinal(kelondroBase64Order.zero(12));
|
||||
public static final long maxDHTDistance = Long.MAX_VALUE;
|
||||
|
||||
public double dhtPosition() {
|
||||
// normalized to 1.0
|
||||
public final long dhtPosition() {
|
||||
// normalized to Long.MAX_VALUE
|
||||
return dhtPosition(this.hash);
|
||||
}
|
||||
|
||||
public static double dhtPosition(final String ahash) {
|
||||
private final static double dhtPositionDouble(final String wordHash) {
|
||||
// normalized to 1.0
|
||||
return ((double) kelondroBase64Order.enhancedCoder.cardinal(ahash.getBytes())) / ((double) maxDHTDistance);
|
||||
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
|
||||
assert c != Long.MAX_VALUE;
|
||||
if (c == Long.MAX_VALUE) return 0.999999999999;
|
||||
return ((double) c) / ((double) Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
public final static double dhtDistance(final String from, final String to) {
|
||||
// computes a virtual distance, the result must be set in relation to maxDHTDistace
|
||||
// if the distance is small, this peer is more responsible for that word hash
|
||||
// if the distance is big, this peer is less responsible for that word hash
|
||||
if (from == null) return dhtPosition(to);
|
||||
final double fromPos = dhtPosition(from);
|
||||
final double toPos = dhtPosition(to);
|
||||
return (fromPos <= toPos) ? (toPos - fromPos) : (1.0 - fromPos + toPos);
|
||||
public final static long dhtPosition(final String wordHash) {
|
||||
// normalized to Long.MAX_VALUE
|
||||
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
|
||||
assert c != Long.MAX_VALUE;
|
||||
if (c == Long.MAX_VALUE) return Long.MAX_VALUE - 1;
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate the DHT position for horizontal and vertical performance scaling:
|
||||
* horizontal: scale with number of words
|
||||
* vertical: scale with number of references for every word
|
||||
* The vertical scaling is selected using the corresponding reference hash, the url hash
|
||||
* This has the effect that every vertical position accumulates references for the same url
|
||||
* and the urls are not spread over all positions of the DHT. To use this effect, the
|
||||
* horizontal DHT position must be normed to a 'rest' value of a partition size
|
||||
* This method is compatible to the classic DHT computation as always one of the vertical
|
||||
* DHT position corresponds to the classic position.
|
||||
* @param wordHash, the hash of the RWI
|
||||
* @param partitions, the number of partitions should be computed with partitions = 2**n, n = scaling factor
|
||||
* @param urlHash, the hash of a reference
|
||||
* @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position
|
||||
*/
|
||||
private final static double dhtPositionDouble(final String wordHash, final String urlHash, final int e) {
|
||||
assert wordHash != null;
|
||||
assert urlHash != null;
|
||||
if (urlHash == null) return dhtPositionDouble(wordHash);
|
||||
// calculate the primary DHT position:
|
||||
// this is done first using the 'classic' dht position and then
|
||||
// calculation an alternative 'first' position considering the partition size
|
||||
// because of the number of partitions, the 'original' position is reached as one of the
|
||||
// alternative dht positions within the partitions
|
||||
double primary = dhtPositionDouble(wordHash); // the hash position for horizontal performance scaling
|
||||
// the number of partitions is 2 ** e, the partitions may grow exponentially (every time it is doubled)
|
||||
double partitions = (double) (1L << e);
|
||||
// modification of the primary position using the partitions to create a normalization:
|
||||
double normalization = Math.floor(primary * partitions) / partitions;
|
||||
// calculate the shift: the alternative position for vertical performance scaling
|
||||
double shift = Math.floor(dhtPositionDouble(urlHash) * partitions) / partitions;
|
||||
// the final position is the primary, normalized position plus the shift
|
||||
double p = primary - normalization + shift;
|
||||
// one of the possible shift positions points to the original dht position:
|
||||
// this is where the shift is equal to the normalization, when
|
||||
// Math.floor(dhtPosition(wordHash) * partitions) == Math.floor(dhtPosition(urlHash) * partitions)
|
||||
assert p < 1.0 : "p = " + p; // because of the normalization never an overflow should occur
|
||||
assert p >= 0.0 : "p = " + p;
|
||||
return (p < 1.0) ? p : p - 1.0;
|
||||
}
|
||||
|
||||
public final static long dhtPosition(final String wordHash, final String urlHash, final int e) {
|
||||
assert wordHash != null;
|
||||
assert urlHash != null;
|
||||
if (urlHash == null || e < 1) return dhtPosition(wordHash);
|
||||
// the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
|
||||
assert e > 0;
|
||||
long partitionMask = (1L << (Long.SIZE - 1 - e)) - 1L;
|
||||
return (dhtPosition(wordHash) & partitionMask) | (dhtPosition(urlHash) & ~partitionMask);
|
||||
}
|
||||
|
||||
/**
|
||||
* compute all vertical DHT positions for a given word
|
||||
* @param wordHash, the hash of the word
|
||||
* @param partitions, the number of partitions of the DHT
|
||||
* @return a vector of long values, the possible DHT positions
|
||||
*/
|
||||
private final static double[] dhtPositionsDouble(final String wordHash, final int e) {
|
||||
assert wordHash != null;
|
||||
int partitions = 1 << e;
|
||||
double[] d = new double[partitions];
|
||||
double primary = dhtPositionDouble(wordHash);
|
||||
double partitionSize = 1.0 / (double) partitions;
|
||||
d[0] = primary - Math.floor(primary * partitions) / partitions;
|
||||
for (int i = 1; i < partitions; i++) {
|
||||
d[i] = d[i - 1] + partitionSize;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public final static long[] dhtPositions(final String wordHash, final int e) {
|
||||
assert wordHash != null;
|
||||
int partitions = 1 << e;
|
||||
long[] l = new long[partitions];
|
||||
long partitionSize = 1L << (Long.SIZE - 1 - e);
|
||||
l[0] = dhtPosition(wordHash) & (partitionSize - 1L);
|
||||
for (int i = 1; i < partitions; i++) {
|
||||
l[i] = l[i - 1] + partitionSize;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
public final static long dhtDistance(final String word, final yacySeed peer) {
|
||||
return dhtDistance(word, peer.hash);
|
||||
}
|
||||
|
||||
private final static long dhtDistance(final String from, final String to) {
|
||||
// the dht distance is a positive value between 0 and 1
|
||||
// if the distance is small, the word more probably belongs to the peer
|
||||
assert to != null;
|
||||
assert from != null;
|
||||
final long toPos = dhtPosition(to);
|
||||
final long fromPos = dhtPosition(from);
|
||||
final long d = toPos - fromPos;
|
||||
return (d >= 0) ? d : (d + Long.MAX_VALUE) + 1;
|
||||
}
|
||||
|
||||
private static String bestGap(final yacySeedDB seedDB) {
|
||||
|
@ -697,7 +792,7 @@ public class yacySeed implements Cloneable {
|
|||
return randomHash();
|
||||
}
|
||||
// find gaps
|
||||
final TreeMap<Double, String> gaps = hashGaps(seedDB);
|
||||
final TreeMap<Long, String> gaps = hashGaps(seedDB);
|
||||
|
||||
// take one gap; prefer biggest but take also another smaller by chance
|
||||
String interval = null;
|
||||
|
@ -709,19 +804,18 @@ public class yacySeed implements Cloneable {
|
|||
if (interval == null) return randomHash();
|
||||
|
||||
// find dht position and size of gap
|
||||
final double gapsize = dhtDistance(interval.substring(0, 12), interval.substring(12));
|
||||
assert gapsize >= 0.0;
|
||||
double gappos = dhtPosition(interval.substring(0, 12)) + (gapsize / 2);
|
||||
if (gappos >= 1.0) gappos = gappos - 1.0; // fix overflow; can only occur for gap at end
|
||||
final long gaphalf = dhtDistance(interval.substring(0, 12), interval.substring(12)) >> 1;
|
||||
long p = dhtPosition(interval.substring(0, 12));
|
||||
long gappos = (Long.MAX_VALUE - p >= gaphalf) ? p + gaphalf : (p - Long.MAX_VALUE) + gaphalf;
|
||||
return positionToHash(gappos);
|
||||
}
|
||||
|
||||
private static TreeMap<Double, String> hashGaps(final yacySeedDB seedDB) {
|
||||
final TreeMap<Double, String>gaps = new TreeMap<Double, String>();
|
||||
private static TreeMap<Long, String> hashGaps(final yacySeedDB seedDB) {
|
||||
final TreeMap<Long, String>gaps = new TreeMap<Long, String>();
|
||||
if (seedDB == null) return gaps;
|
||||
|
||||
final Iterator<yacySeed> i = seedDB.seedsConnected(true, false, null, (float) 0.0);
|
||||
double d;
|
||||
long l;
|
||||
yacySeed s0 = null, s1, first = null;
|
||||
while (i.hasNext()) {
|
||||
s1 = i.next();
|
||||
|
@ -730,19 +824,14 @@ public class yacySeed implements Cloneable {
|
|||
first = s0;
|
||||
continue;
|
||||
}
|
||||
if (s0.hash.equals("fF99P8dMio7M")) {
|
||||
System.out.print(0);
|
||||
}
|
||||
d = dhtDistance(s0.hash, s1.hash);
|
||||
assert d >= 0.0;
|
||||
gaps.put(d, s0.hash + s1.hash);
|
||||
l = dhtDistance(s0.hash, s1.hash);
|
||||
gaps.put(l, s0.hash + s1.hash);
|
||||
s0 = s1;
|
||||
}
|
||||
// compute also the last gap
|
||||
if ((first != null) && (s0 != null)) {
|
||||
d = dhtDistance(s0.hash, first.hash);
|
||||
assert d >= 0.0;
|
||||
gaps.put(d, s0.hash + first.hash);
|
||||
l = dhtDistance(s0.hash, first.hash);
|
||||
gaps.put(l, s0.hash + first.hash);
|
||||
}
|
||||
return gaps;
|
||||
}
|
||||
|
@ -752,19 +841,13 @@ public class yacySeed implements Cloneable {
|
|||
assert t >= 0.0 : "t = " + t;
|
||||
assert t < 1.0 : "t = " + t;
|
||||
|
||||
// now calculate a hash that is closest to the best position
|
||||
double d, bestD = Double.MAX_VALUE;
|
||||
final int tries = 128;
|
||||
String hash, bestHash = null;
|
||||
for (int v = 0; v < tries; v++) {
|
||||
hash = randomHash();
|
||||
d = dhtPosition(hash);
|
||||
if (Math.abs(d - t) < bestD) {
|
||||
bestD = Math.abs(d - t);
|
||||
bestHash = hash;
|
||||
return new String(kelondroBase64Order.enhancedCoder.uncardinal((long) (((double) Long.MAX_VALUE) * t))) + "AA";
|
||||
}
|
||||
}
|
||||
return bestHash;
|
||||
|
||||
private static String positionToHash(final long l) {
|
||||
// transform the position of a peer position into a close peer hash
|
||||
|
||||
return new String(kelondroBase64Order.enhancedCoder.uncardinal(l)) + "AA";
|
||||
}
|
||||
|
||||
public static yacySeed genLocalSeed(final yacySeedDB db) {
|
||||
|
@ -923,4 +1006,46 @@ public class yacySeed implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFn76
|
||||
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3
|
||||
// test the DHT position calculation
|
||||
String wordHash = args[0];
|
||||
double dhtd;
|
||||
long dhtl;
|
||||
int partitionExponent = 0;
|
||||
if (args.length == 3) {
|
||||
// the horizontal and vertical position calculation
|
||||
String urlHash = args[1];
|
||||
partitionExponent = Integer.parseInt(args[2]);
|
||||
dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
|
||||
dhtl = dhtPosition(wordHash, urlHash, partitionExponent);
|
||||
} else {
|
||||
// only a horizontal position calculation
|
||||
dhtd = dhtPositionDouble(wordHash);
|
||||
dhtl = dhtPosition(wordHash);
|
||||
}
|
||||
System.out.println("DHT Double = " + dhtd);
|
||||
System.out.println("DHT Long = " + dhtl);
|
||||
System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE));
|
||||
System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
|
||||
System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
|
||||
System.out.println("DHT as b64 from Long = " + positionToHash(dhtl));
|
||||
|
||||
System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: ");
|
||||
double[] d = dhtPositionsDouble(wordHash, partitionExponent);
|
||||
for (int i = 0; i < d.length; i++) {
|
||||
if (i > 0) System.out.print(", ");
|
||||
System.out.print(positionToHash(d[i]));
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : ");
|
||||
long[] l = dhtPositions(wordHash, partitionExponent);
|
||||
for (int i = 0; i < l.length; i++) {
|
||||
if (i > 0) System.out.print(", ");
|
||||
System.out.print(positionToHash(l[i]));
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user