Design-check, Extension and Refactoring of DHT target position computation:

- two different computations (but mathematical equivalent) of the DHT distance had been consolidated
- moved from 0.0 .. 1.0 double-range position computation to 0 .. Long.Max range for DHT targets
- added fast Long - to - hash computation
- high-precision target computation of gaps for new peers
- added new target computation for horizontal and vertical DHT targets (not yet in use)
- old horizontal-only DHT targets will be upwards compatible to new horizontal and vertical DHT positions

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5318 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-11-03 00:27:23 +00:00
parent dd27ce7216
commit d014b2728a
10 changed files with 219 additions and 130 deletions

View File

@ -43,7 +43,6 @@ import de.anomic.tools.nxTools;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.yacy.yacyNetwork;
import de.anomic.yacy.yacySeed;
@ -193,7 +192,7 @@ public final class transferRWI {
if ((wordhashes.length == 0) || (received == 0)) {
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs");
} else {
final double avdist = (yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[0]) + yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[received - 1])) / 2.0;
final long avdist = (yacySeed.dhtDistance(wordhashes[0], sb.webIndex.seedDB.mySeed()) + yacySeed.dhtDistance(wordhashes[received - 1], sb.webIndex.seedDB.mySeed())) / 2;
sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs");
RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", requesting " + unknownURL.size() + " URLs, blocked " + blocked, "", ""));
}

View File

@ -263,6 +263,16 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
return c;
}
public final byte[] uncardinal(long c) {
c = c >> 3;
byte[] b = new byte[10];
for (int p = 9; p >= 0; p--) {
b[p] = (byte) alpha[(int) (c & 0x3fL)];
c = c >> 6;
}
return b;
}
public final long cardinal(final byte[] key) {
if (this.zero == null) return cardinalI(key);
final long zeroCardinal = cardinalI(this.zero);
@ -373,9 +383,10 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
}
public static void main(final String[] s) {
// java -classpath classes de.anomic.kelondro.kelondroBase64Order
final kelondroBase64Order b64 = new kelondroBase64Order(true, true);
if (s.length == 0) {
System.out.println("usage: -[ec|dc|es|ds|s2m] <arg>");
System.out.println("usage: -[ec|dc|es|ds|clcn] <arg>");
System.exit(0);
}
if (s[0].equals("-ec")) {
@ -394,5 +405,13 @@ public class kelondroBase64Order extends kelondroAbstractOrder<byte[]> implement
// generate a b64 decoding from a given string
System.out.println(b64.decodeString(s[1], ""));
}
if (s[0].equals("-cl")) {
// return the cardinal of a given string as long value with the enhanced encoder
System.out.println(kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes()));
}
if (s[0].equals("-cn")) {
// return the cardinal of a given string as normalized float 0 .. 1 with the enhanced encoder
System.out.println(((double) kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes())) / ((double) Long.MAX_VALUE));
}
}
}

View File

@ -39,7 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.serverCodings;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
public class plasmaDHTChunk {
@ -112,7 +112,7 @@ public class plasmaDHTChunk {
this.log = log;
this.wordIndex = wordIndex;
this.startPointHash = selectTransferStart();
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash));
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
// count the indexes, can be smaller as expected
@ -130,7 +130,7 @@ public class plasmaDHTChunk {
try {
this.log = log;
this.wordIndex = wordIndex;
if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash));
if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
selectTransferContainers(startHash, minCount, maxCount, maxtime);
// count the indexes, can be smaller as expected
@ -145,27 +145,7 @@ public class plasmaDHTChunk {
}
private String selectTransferStart() {
String startPointHash;
// first try to select with increasing probality a good start point
final double minimumDistance = ((double) peerRedundancy) / ((double) wordIndex.seedDB.sizeConnected());
double d, bestDistance = 0.0;
String bestHash = null;
for (int i = wordIndex.seedDB.sizeConnected() / 8; i > 0; i--) {
startPointHash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(i + System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, startPointHash);
if (d > (minimumDistance + ((double) i / (double) 10))) {
return startPointHash;
}
if (d > bestDistance) {
bestDistance = d;
bestHash = startPointHash;
}
}
// if that fails, take simply the best start point
if (bestHash == null) {
return wordIndex.seedDB.mySeed().hash.substring(0, 11) + "z";
}
return bestHash;
return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
}
private void selectTransferContainers(final String hash, final int mincount, final int maxcount, final int maxtime) throws InterruptedException {
@ -198,7 +178,7 @@ public class plasmaDHTChunk {
int wholesize;
urlCache = new HashMap<String, indexURLReference>();
final double maximumDistance = ((double) peerRedundancy * 2) / (wordIndex.seedDB.sizeConnected());
final long maximumDistanceLong = Long.MAX_VALUE / wordIndex.seedDB.sizeConnected() * peerRedundancy * 2;
final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
while (
(maxcount > refcount) &&
@ -206,7 +186,7 @@ public class plasmaDHTChunk {
((container = indexContainerIterator.next()) != null) &&
(container.size() > 0) &&
((tmpContainers.size() == 0) ||
(yacyDHTAction.dhtDistance(container.getWordHash(), tmpContainers.get(0).getWordHash()) < maximumDistance)) &&
(Math.abs(yacySeed.dhtPosition(container.getWordHash()) - yacySeed.dhtPosition(tmpContainers.get(0).getWordHash())) < maximumDistanceLong)) &&
(System.currentTimeMillis() < timeout)
) {
// check for interruption

View File

@ -123,14 +123,12 @@ public class plasmaGrafics {
final int cx = eventPicture.getWidth() / 2;
final int cy = eventPicture.getHeight() / 2;
String hash;
int angle;
// draw in the primary search peers
for (int j = 0; j < primarySearches.length; j++) {
eventPicture.setColor((primarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN);
hash = primarySearches[j].target().hash;
angle = (int) (360 * yacySeed.dhtPosition(hash));
angle = (int) (360.0 * (((double) primarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE)));
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
}
@ -138,8 +136,7 @@ public class plasmaGrafics {
if (secondarySearches != null) {
for (int j = 0; j < secondarySearches.length; j++) {
eventPicture.setColor((secondarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN);
hash = secondarySearches[j].target().hash;
angle = (int) (360 * yacySeed.dhtPosition(hash));
angle = (int) (360.0 * (((double) secondarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE)));
eventPicture.arcLine(cx, cy, cr - 10, cr, angle - 1);
eventPicture.arcLine(cx, cy, cr - 10, cr, angle + 1);
}
@ -150,8 +147,7 @@ public class plasmaGrafics {
final Iterator<String> i = query.queryHashes.iterator();
eventPicture.setColor(ymageMatrix.GREY);
while (i.hasNext()) {
hash = i.next();
angle = (int) (360 * yacySeed.dhtPosition(hash));
angle = (int) (360.0 * (((double) yacySeed.dhtPosition(i.next())) / ((double) Long.MAX_VALUE)));
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
}
@ -249,7 +245,7 @@ public class plasmaGrafics {
final String name = seed.getName().toUpperCase() /*+ ":" + seed.hash + ":" + (((double) ((int) (100 * (((double) yacySeed.dhtPosition(seed.hash)) / ((double) yacySeed.maxDHTDistance))))) / 100.0)*/;
if (name.length() < shortestName) shortestName = name.length();
if (name.length() > longestName) longestName = name.length();
final int angle = (int) (360 * seed.dhtPosition());
final int angle = (int) (360.0 * (((double) seed.dhtPosition()) / ((double) Long.MAX_VALUE)));
//System.out.println("Seed " + seed.hash + " has distance " + seed.dhtDistance() + ", angle = " + angle);
int linelength = 20 + outerradius * (20 * (name.length() - shortestName) / (longestName - shortestName) + Math.abs(seed.hash.hashCode() % 20)) / 60;
if (linelength > outerradius) linelength = outerradius;

View File

@ -49,7 +49,6 @@ import de.anomic.kelondro.kelondroSortStore;
import de.anomic.plasma.plasmaSnippetCache.MediaSnippet;
import de.anomic.server.serverProfiling;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyDHTAction;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
@ -174,7 +173,7 @@ public final class plasmaSearchEvent {
final Iterator<Map.Entry<String, indexContainer>> ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator();
Map.Entry<String, indexContainer> entry;
int maxcount = -1;
double mindhtdistance = 1.1, d;
long mindhtdistance = Long.MAX_VALUE, l;
String wordhash;
while (ci.hasNext()) {
entry = ci.next();
@ -185,10 +184,10 @@ public final class plasmaSearchEvent {
IAmaxcounthash = wordhash;
maxcount = container.size();
}
d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, wordhash);
if (d < mindhtdistance) {
l = yacySeed.dhtDistance(wordhash, wordIndex.seedDB.mySeed());
if (l < mindhtdistance) {
// calculate the word hash that is closest to our dht position
mindhtdistance = d;
mindhtdistance = l;
IAneardhthash = wordhash;
}
IACount.put(wordhash, Integer.valueOf(container.size()));

View File

@ -2005,8 +2005,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
try {
// find a list of DHT-peers
final double maxDist = 0.3;
final ArrayList<yacySeed> seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, Math.min(8, (int) (this.webIndex.seedDB.sizeConnected() * maxDist)), dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash(), maxDist);
final ArrayList<yacySeed> seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, 9, dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash());
if (seeds.size() < peerCount) {
log.logWarning("found not enough (" + seeds.size() + ") peers for distribution for dhtchunk [" + dhtChunk.firstContainer().getWordHash() + " .. " + dhtChunk.lastContainer().getWordHash() + "]");
return false;

View File

@ -479,7 +479,7 @@ public final class yacyClient {
try {
result = nxTools.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000), "UTF-8");
} catch (final IOException e) {
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(target.hash, wordhashes.substring(0, 12)));
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacySeed.dhtDistance(wordhashes.substring(0, 12), target));
//yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
return null;
}
@ -492,8 +492,7 @@ public final class yacyClient {
+ " (zero response), score="
+ target.selectscore
+ ", DHTdist="
+ yacyDHTAction.dhtDistance(target.hash, wordhashes
.substring(0, 12)));
+ yacySeed.dhtDistance(wordhashes.substring(0, 12), target));
return null;
}
@ -653,9 +652,7 @@ public final class yacyClient {
+ ", score="
+ target.selectscore
+ ", DHTdist="
+ ((wordhashes.length() < 12) ? "void" : Double
.toString(yacyDHTAction.dhtDistance(target.hash,
wordhashes.substring(0, 12))))
+ ((wordhashes.length() < 12) ? "void" : yacySeed.dhtDistance(wordhashes.substring(0, 12), target))
+ ", searchtime=" + searchtime + ", netdelay="
+ (totalrequesttime - searchtime) + ", references="
+ result.get("references"));

View File

@ -302,35 +302,13 @@ public class yacyDHTAction {
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash) {
if (seedDB == null) return false;
if (seedDB.mySeed().isPotential()) return false;
final double distance = dhtDistance(seedDB.mySeed().hash, wordhash);
final double max = 1.2 / seedDB.sizeConnected();
final long distance = yacySeed.dhtDistance(wordhash, seedDB.mySeed());
final long max = Long.MAX_VALUE / seedDB.sizeConnected() * 2;
//System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max);
return (distance > 0) && (distance <= max);
}
public static double dhtDistance(final String peer, final String word) {
// the dht distance is a positive value between 0 and 1
// if the distance is small, the word more probably belongs to the peer
final double d = hashDistance(peer, word);
if (d > 0) {
return d; // case where the word is 'before' the peer
}
return 1 + d; // wrap-around case
}
private static double hashDistance(final String from, final String to) {
// computes the distance between two hashes.
// the maximum distance between two hashes is 1, the minimum -1
// this can be used like "from - to"
// the result is positive if from > to
assert (from != null);
assert (to != null);
assert (from.length() == 12) : "from.length = " + from.length() + ", from = " + from;
assert (to.length() == 12) : "to.length = " + to.length() + ", to = " + to;
return ((double) (kelondroBase64Order.enhancedCoder.cardinal(from.getBytes()) - kelondroBase64Order.enhancedCoder.cardinal(to.getBytes()))) / ((double) Long.MAX_VALUE);
}
public synchronized ArrayList<yacySeed> getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey, final double maxDist) {
public synchronized ArrayList<yacySeed> getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey) {
// find a list of DHT-peers
assert firstKey != null;
assert lastKey != null;
@ -347,7 +325,7 @@ public class yacyDHTAction {
//double ownDistance = Math.min(yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, firstKey), yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, lastKey));
//double maxDistance = Math.min(ownDistance, maxDist);
double firstdist, lastdist;
long firstdist, lastdist;
Iterator<yacySeed> e = this.getAcceptRemoteIndexSeeds(lastKey);
final TreeSet<String> doublecheck = new TreeSet<String>(kelondroBase64Order.enhancedComparator);
int maxloop = Math.min(100, seedDB.sizeConnected()); // to ensure termination
@ -355,11 +333,9 @@ public class yacyDHTAction {
while ((e.hasNext()) && (seeds.size() < (primaryPeerCount + reservePeerCount)) && (maxloop-- > 0)) {
seed = e.next();
if (seed == null || seed.hash == null) continue;
firstdist = yacyDHTAction.dhtDistance(seed.hash, firstKey);
lastdist = yacyDHTAction.dhtDistance(seed.hash, lastKey);
if (lastdist > maxDist) {
if (log != null && yacyCore.log.isFine()) log.logFine("Discarded too distant DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);
} else if (doublecheck.contains(seed.hash)) {
firstdist = yacySeed.dhtDistance(firstKey, seed);
lastdist = yacySeed.dhtDistance(lastKey, seed);
if (doublecheck.contains(seed.hash)) {
if (log != null && yacyCore.log.isFine()) log.logFine("Discarded double DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);
} else {
if (log != null) log.logInfo("Selected " + ((seeds.size() < primaryPeerCount) ? "primary" : "reserve") + " DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist);

View File

@ -187,7 +187,7 @@ public class yacySearch extends Thread {
Iterator<yacySeed> dhtEnum;
int c;
String wordhash;
double distance;
long distance;
Iterator<String> iter = wordhashes.iterator();
while (iter.hasNext()) {
wordhash = iter.next();
@ -196,8 +196,7 @@ public class yacySearch extends Thread {
while (dhtEnum.hasNext() && c > 0) {
seed = dhtEnum.next();
if (seed == null || seed.hash == null) continue;
distance = yacyDHTAction.dhtDistance(seed.hash, wordhash);
if (distance > 0.2) continue; // catch bug in peer selection
distance = yacySeed.dhtDistance(wordhash, seed);
if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
if (serverLog.isFine("PLASMA")) serverLog.logFine("PLASMA", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/" + distance + " for wordhash " + wordhash + ", score " + c);
ranking.addScore(seed.hash, c--);

View File

@ -668,27 +668,122 @@ public class yacySeed implements Cloneable {
return type.equals(yacySeed.PEERTYPE_SENIOR) || type.equals(yacySeed.PEERTYPE_PRINCIPAL);
}
public static final long minDHTNumber = kelondroBase64Order.enhancedCoder.cardinal(kelondroBase64Order.zero(12));
public static final long maxDHTDistance = Long.MAX_VALUE;
public double dhtPosition() {
// normalized to 1.0
public final long dhtPosition() {
// normalized to Long.MAX_VALUE
return dhtPosition(this.hash);
}
public static double dhtPosition(final String ahash) {
private final static double dhtPositionDouble(final String wordHash) {
// normalized to 1.0
return ((double) kelondroBase64Order.enhancedCoder.cardinal(ahash.getBytes())) / ((double) maxDHTDistance);
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
assert c != Long.MAX_VALUE;
if (c == Long.MAX_VALUE) return 0.999999999999;
return ((double) c) / ((double) Long.MAX_VALUE);
}
public final static double dhtDistance(final String from, final String to) {
// computes a virtual distance, the result must be set in relation to maxDHTDistace
// if the distance is small, this peer is more responsible for that word hash
// if the distance is big, this peer is less responsible for that word hash
if (from == null) return dhtPosition(to);
final double fromPos = dhtPosition(from);
final double toPos = dhtPosition(to);
return (fromPos <= toPos) ? (toPos - fromPos) : (1.0 - fromPos + toPos);
public final static long dhtPosition(final String wordHash) {
// normalized to Long.MAX_VALUE
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
assert c != Long.MAX_VALUE;
if (c == Long.MAX_VALUE) return Long.MAX_VALUE - 1;
return c;
}
/**
* calculate the DHT position for horizontal and vertical performance scaling:
* horizontal: scale with number of words
* vertical: scale with number of references for every word
* The vertical scaling is selected using the corresponding reference hash, the url hash
* This has the effect that every vertical position accumulates references for the same url
* and the urls are not spread over all positions of the DHT. To use this effect, the
* horizontal DHT position must be normed to a 'rest' value of a partition size
* This method is compatible to the classic DHT computation as always one of the vertical
* DHT position corresponds to the classic position.
* @param wordHash, the hash of the RWI
* @param partitions, the number of partitions should be computed with partitions = 2**n, n = scaling factor
* @param urlHash, the hash of a reference
* @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position
*/
private final static double dhtPositionDouble(final String wordHash, final String urlHash, final int e) {
assert wordHash != null;
assert urlHash != null;
if (urlHash == null) return dhtPositionDouble(wordHash);
// calculate the primary DHT position:
// this is done first using the 'classic' dht position and then
// calculation an alternative 'first' position considering the partition size
// because of the number of partitions, the 'original' position is reached as one of the
// alternative dht positions within the partitions
double primary = dhtPositionDouble(wordHash); // the hash position for horizontal performance scaling
// the number of partitions is 2 ** e, the partitions may grow exponentially (every time it is doubled)
double partitions = (double) (1L << e);
// modification of the primary position using the partitions to create a normalization:
double normalization = Math.floor(primary * partitions) / partitions;
// calculate the shift: the alternative position for vertical performance scaling
double shift = Math.floor(dhtPositionDouble(urlHash) * partitions) / partitions;
// the final position is the primary, normalized position plus the shift
double p = primary - normalization + shift;
// one of the possible shift positions points to the original dht position:
// this is where the shift is equal to the normalization, when
// Math.floor(dhtPosition(wordHash) * partitions) == Math.floor(dhtPosition(urlHash) * partitions)
assert p < 1.0 : "p = " + p; // because of the normalization never an overflow should occur
assert p >= 0.0 : "p = " + p;
return (p < 1.0) ? p : p - 1.0;
}
public final static long dhtPosition(final String wordHash, final String urlHash, final int e) {
assert wordHash != null;
assert urlHash != null;
if (urlHash == null || e < 1) return dhtPosition(wordHash);
// the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
assert e > 0;
long partitionMask = (1L << (Long.SIZE - 1 - e)) - 1L;
return (dhtPosition(wordHash) & partitionMask) | (dhtPosition(urlHash) & ~partitionMask);
}
/**
* compute all vertical DHT positions for a given word
* @param wordHash, the hash of the word
* @param partitions, the number of partitions of the DHT
* @return a vector of long values, the possible DHT positions
*/
private final static double[] dhtPositionsDouble(final String wordHash, final int e) {
assert wordHash != null;
int partitions = 1 << e;
double[] d = new double[partitions];
double primary = dhtPositionDouble(wordHash);
double partitionSize = 1.0 / (double) partitions;
d[0] = primary - Math.floor(primary * partitions) / partitions;
for (int i = 1; i < partitions; i++) {
d[i] = d[i - 1] + partitionSize;
}
return d;
}
public final static long[] dhtPositions(final String wordHash, final int e) {
assert wordHash != null;
int partitions = 1 << e;
long[] l = new long[partitions];
long partitionSize = 1L << (Long.SIZE - 1 - e);
l[0] = dhtPosition(wordHash) & (partitionSize - 1L);
for (int i = 1; i < partitions; i++) {
l[i] = l[i - 1] + partitionSize;
}
return l;
}
public final static long dhtDistance(final String word, final yacySeed peer) {
return dhtDistance(word, peer.hash);
}
private final static long dhtDistance(final String from, final String to) {
// the dht distance is a positive value between 0 and 1
// if the distance is small, the word more probably belongs to the peer
assert to != null;
assert from != null;
final long toPos = dhtPosition(to);
final long fromPos = dhtPosition(from);
final long d = toPos - fromPos;
return (d >= 0) ? d : (d + Long.MAX_VALUE) + 1;
}
private static String bestGap(final yacySeedDB seedDB) {
@ -697,7 +792,7 @@ public class yacySeed implements Cloneable {
return randomHash();
}
// find gaps
final TreeMap<Double, String> gaps = hashGaps(seedDB);
final TreeMap<Long, String> gaps = hashGaps(seedDB);
// take one gap; prefer biggest but take also another smaller by chance
String interval = null;
@ -709,19 +804,18 @@ public class yacySeed implements Cloneable {
if (interval == null) return randomHash();
// find dht position and size of gap
final double gapsize = dhtDistance(interval.substring(0, 12), interval.substring(12));
assert gapsize >= 0.0;
double gappos = dhtPosition(interval.substring(0, 12)) + (gapsize / 2);
if (gappos >= 1.0) gappos = gappos - 1.0; // fix overflow; can only occur for gap at end
final long gaphalf = dhtDistance(interval.substring(0, 12), interval.substring(12)) >> 1;
long p = dhtPosition(interval.substring(0, 12));
long gappos = (Long.MAX_VALUE - p >= gaphalf) ? p + gaphalf : (p - Long.MAX_VALUE) + gaphalf;
return positionToHash(gappos);
}
private static TreeMap<Double, String> hashGaps(final yacySeedDB seedDB) {
final TreeMap<Double, String>gaps = new TreeMap<Double, String>();
private static TreeMap<Long, String> hashGaps(final yacySeedDB seedDB) {
final TreeMap<Long, String>gaps = new TreeMap<Long, String>();
if (seedDB == null) return gaps;
final Iterator<yacySeed> i = seedDB.seedsConnected(true, false, null, (float) 0.0);
double d;
long l;
yacySeed s0 = null, s1, first = null;
while (i.hasNext()) {
s1 = i.next();
@ -730,19 +824,14 @@ public class yacySeed implements Cloneable {
first = s0;
continue;
}
if (s0.hash.equals("fF99P8dMio7M")) {
System.out.print(0);
}
d = dhtDistance(s0.hash, s1.hash);
assert d >= 0.0;
gaps.put(d, s0.hash + s1.hash);
l = dhtDistance(s0.hash, s1.hash);
gaps.put(l, s0.hash + s1.hash);
s0 = s1;
}
// compute also the last gap
if ((first != null) && (s0 != null)) {
d = dhtDistance(s0.hash, first.hash);
assert d >= 0.0;
gaps.put(d, s0.hash + first.hash);
l = dhtDistance(s0.hash, first.hash);
gaps.put(l, s0.hash + first.hash);
}
return gaps;
}
@ -752,19 +841,13 @@ public class yacySeed implements Cloneable {
assert t >= 0.0 : "t = " + t;
assert t < 1.0 : "t = " + t;
// now calculate a hash that is closest to the best position
double d, bestD = Double.MAX_VALUE;
final int tries = 128;
String hash, bestHash = null;
for (int v = 0; v < tries; v++) {
hash = randomHash();
d = dhtPosition(hash);
if (Math.abs(d - t) < bestD) {
bestD = Math.abs(d - t);
bestHash = hash;
}
}
return bestHash;
return new String(kelondroBase64Order.enhancedCoder.uncardinal((long) (((double) Long.MAX_VALUE) * t))) + "AA";
}
private static String positionToHash(final long l) {
// transform the position of a peer position into a close peer hash
return new String(kelondroBase64Order.enhancedCoder.uncardinal(l)) + "AA";
}
public static yacySeed genLocalSeed(final yacySeedDB db) {
@ -923,4 +1006,46 @@ public class yacySeed implements Cloneable {
}
}
public static void main(String[] args) {
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFn76
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3
// test the DHT position calculation
String wordHash = args[0];
double dhtd;
long dhtl;
int partitionExponent = 0;
if (args.length == 3) {
// the horizontal and vertical position calculation
String urlHash = args[1];
partitionExponent = Integer.parseInt(args[2]);
dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
dhtl = dhtPosition(wordHash, urlHash, partitionExponent);
} else {
// only a horizontal position calculation
dhtd = dhtPositionDouble(wordHash);
dhtl = dhtPosition(wordHash);
}
System.out.println("DHT Double = " + dhtd);
System.out.println("DHT Long = " + dhtl);
System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE));
System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
System.out.println("DHT as b64 from Long = " + positionToHash(dhtl));
System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: ");
double[] d = dhtPositionsDouble(wordHash, partitionExponent);
for (int i = 0; i < d.length; i++) {
if (i > 0) System.out.print(", ");
System.out.print(positionToHash(d[i]));
}
System.out.println();
System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : ");
long[] l = dhtPositions(wordHash, partitionExponent);
for (int i = 0; i < l.length; i++) {
if (i > 0) System.out.print(", ");
System.out.print(positionToHash(l[i]));
}
System.out.println();
}
}