diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index a092b064c..f0ad7ae1e 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -43,7 +43,6 @@ import de.anomic.tools.nxTools; import de.anomic.xml.RSSFeed; import de.anomic.xml.RSSMessage; import de.anomic.yacy.yacyCore; -import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacyNetwork; import de.anomic.yacy.yacySeed; @@ -193,7 +192,7 @@ public final class transferRWI { if ((wordhashes.length == 0) || (received == 0)) { sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs, blocked " + blocked + " RWIs"); } else { - final double avdist = (yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[0]) + yacyDHTAction.dhtDistance(sb.webIndex.seedDB.mySeed().hash, wordhashes[received - 1])) / 2.0; + final long avdist = (yacySeed.dhtDistance(wordhashes[0], sb.webIndex.seedDB.mySeed()) + yacySeed.dhtDistance(wordhashes[received - 1], sb.webIndex.seedDB.mySeed())) / 2; sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs"); RSSFeed.channels(RSSFeed.INDEXRECEIVE).addMessage(new RSSMessage("Received " + received + " RWIs [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", requesting " + unknownURL.size() + " URLs, blocked " + blocked, "", "")); } diff --git a/source/de/anomic/kelondro/kelondroBase64Order.java b/source/de/anomic/kelondro/kelondroBase64Order.java index 7859cc803..2356d1b6f 100644 --- a/source/de/anomic/kelondro/kelondroBase64Order.java +++ b/source/de/anomic/kelondro/kelondroBase64Order.java @@ -263,6 +263,16 @@ public class kelondroBase64Order extends kelondroAbstractOrder implement return c; } + public final byte[] uncardinal(long c) { + c = c >> 3; + byte[] b = new byte[10]; + for (int p = 9; p >= 0; p--) { + b[p] = (byte) alpha[(int) (c & 0x3fL)]; + c = c >> 6; + } + return b; + } + public final long cardinal(final byte[] key) { if (this.zero == null) return cardinalI(key); final long zeroCardinal = cardinalI(this.zero); @@ -373,9 +383,10 @@ public class kelondroBase64Order extends kelondroAbstractOrder implement } public static void main(final String[] s) { + // java -classpath classes de.anomic.kelondro.kelondroBase64Order final kelondroBase64Order b64 = new kelondroBase64Order(true, true); if (s.length == 0) { - System.out.println("usage: -[ec|dc|es|ds|s2m] "); + System.out.println("usage: -[ec|dc|es|ds|clcn] "); System.exit(0); } if (s[0].equals("-ec")) { @@ -394,5 +405,13 @@ public class kelondroBase64Order extends kelondroAbstractOrder implement // generate a b64 decoding from a given string System.out.println(b64.decodeString(s[1], "")); } + if (s[0].equals("-cl")) { + // return the cardinal of a given string as long value with the enhanced encoder + System.out.println(kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes())); + } + if (s[0].equals("-cn")) { + // return the cardinal of a given string as normalized float 0 .. 1 with the enhanced encoder + System.out.println(((double) kelondroBase64Order.enhancedCoder.cardinal(s[1].getBytes())) / ((double) Long.MAX_VALUE)); + } } } diff --git a/source/de/anomic/plasma/plasmaDHTChunk.java b/source/de/anomic/plasma/plasmaDHTChunk.java index a7883f854..79ff39f87 100644 --- a/source/de/anomic/plasma/plasmaDHTChunk.java +++ b/source/de/anomic/plasma/plasmaDHTChunk.java @@ -39,7 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; import de.anomic.server.serverCodings; import de.anomic.server.logging.serverLog; -import de.anomic.yacy.yacyDHTAction; +import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; public class plasmaDHTChunk { @@ -112,7 +112,7 @@ public class plasmaDHTChunk { this.log = log; this.wordIndex = wordIndex; this.startPointHash = selectTransferStart(); - if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash)); + if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed())); selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime); // count the indexes, can be smaller as expected @@ -130,7 +130,7 @@ public class plasmaDHTChunk { try { this.log = log; this.wordIndex = wordIndex; - if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, this.startPointHash)); + if (this.log.isFine()) log.logFine("Demanded hash " + startHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed())); selectTransferContainers(startHash, minCount, maxCount, maxtime); // count the indexes, can be smaller as expected @@ -145,27 +145,7 @@ public class plasmaDHTChunk { } private String selectTransferStart() { - String startPointHash; - // first try to select with increasing probality a good start point - final double minimumDistance = ((double) peerRedundancy) / ((double) wordIndex.seedDB.sizeConnected()); - double d, bestDistance = 0.0; - String bestHash = null; - for (int i = wordIndex.seedDB.sizeConnected() / 8; i > 0; i--) { - startPointHash = kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(i + System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength); - d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, startPointHash); - if (d > (minimumDistance + ((double) i / (double) 10))) { - return startPointHash; - } - if (d > bestDistance) { - bestDistance = d; - bestHash = startPointHash; - } - } - // if that fails, take simply the best start point - if (bestHash == null) { - return wordIndex.seedDB.mySeed().hash.substring(0, 11) + "z"; - } - return bestHash; + return kelondroBase64Order.enhancedCoder.encode(serverCodings.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength); } private void selectTransferContainers(final String hash, final int mincount, final int maxcount, final int maxtime) throws InterruptedException { @@ -198,7 +178,7 @@ public class plasmaDHTChunk { int wholesize; urlCache = new HashMap(); - final double maximumDistance = ((double) peerRedundancy * 2) / (wordIndex.seedDB.sizeConnected()); + final long maximumDistanceLong = Long.MAX_VALUE / wordIndex.seedDB.sizeConnected() * peerRedundancy * 2; final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; while ( (maxcount > refcount) && @@ -206,7 +186,7 @@ public class plasmaDHTChunk { ((container = indexContainerIterator.next()) != null) && (container.size() > 0) && ((tmpContainers.size() == 0) || - (yacyDHTAction.dhtDistance(container.getWordHash(), tmpContainers.get(0).getWordHash()) < maximumDistance)) && + (Math.abs(yacySeed.dhtPosition(container.getWordHash()) - yacySeed.dhtPosition(tmpContainers.get(0).getWordHash())) < maximumDistanceLong)) && (System.currentTimeMillis() < timeout) ) { // check for interruption diff --git a/source/de/anomic/plasma/plasmaGrafics.java b/source/de/anomic/plasma/plasmaGrafics.java index d1bd1c0c6..c47dd69b1 100644 --- a/source/de/anomic/plasma/plasmaGrafics.java +++ b/source/de/anomic/plasma/plasmaGrafics.java @@ -123,14 +123,12 @@ public class plasmaGrafics { final int cx = eventPicture.getWidth() / 2; final int cy = eventPicture.getHeight() / 2; - String hash; int angle; // draw in the primary search peers for (int j = 0; j < primarySearches.length; j++) { eventPicture.setColor((primarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN); - hash = primarySearches[j].target().hash; - angle = (int) (360 * yacySeed.dhtPosition(hash)); + angle = (int) (360.0 * (((double) primarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE))); eventPicture.arcLine(cx, cy, cr - 20, cr, angle); } @@ -138,8 +136,7 @@ public class plasmaGrafics { if (secondarySearches != null) { for (int j = 0; j < secondarySearches.length; j++) { eventPicture.setColor((secondarySearches[j].isAlive()) ? ymageMatrix.RED : ymageMatrix.GREEN); - hash = secondarySearches[j].target().hash; - angle = (int) (360 * yacySeed.dhtPosition(hash)); + angle = (int) (360.0 * (((double) secondarySearches[j].target().dhtPosition()) / ((double) Long.MAX_VALUE))); eventPicture.arcLine(cx, cy, cr - 10, cr, angle - 1); eventPicture.arcLine(cx, cy, cr - 10, cr, angle + 1); } @@ -150,8 +147,7 @@ public class plasmaGrafics { final Iterator i = query.queryHashes.iterator(); eventPicture.setColor(ymageMatrix.GREY); while (i.hasNext()) { - hash = i.next(); - angle = (int) (360 * yacySeed.dhtPosition(hash)); + angle = (int) (360.0 * (((double) yacySeed.dhtPosition(i.next())) / ((double) Long.MAX_VALUE))); eventPicture.arcLine(cx, cy, cr - 20, cr, angle); } @@ -249,7 +245,7 @@ public class plasmaGrafics { final String name = seed.getName().toUpperCase() /*+ ":" + seed.hash + ":" + (((double) ((int) (100 * (((double) yacySeed.dhtPosition(seed.hash)) / ((double) yacySeed.maxDHTDistance))))) / 100.0)*/; if (name.length() < shortestName) shortestName = name.length(); if (name.length() > longestName) longestName = name.length(); - final int angle = (int) (360 * seed.dhtPosition()); + final int angle = (int) (360.0 * (((double) seed.dhtPosition()) / ((double) Long.MAX_VALUE))); //System.out.println("Seed " + seed.hash + " has distance " + seed.dhtDistance() + ", angle = " + angle); int linelength = 20 + outerradius * (20 * (name.length() - shortestName) / (longestName - shortestName) + Math.abs(seed.hash.hashCode() % 20)) / 60; if (linelength > outerradius) linelength = outerradius; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 14b8d17de..6e6e1ed33 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -49,7 +49,6 @@ import de.anomic.kelondro.kelondroSortStore; import de.anomic.plasma.plasmaSnippetCache.MediaSnippet; import de.anomic.server.serverProfiling; import de.anomic.server.logging.serverLog; -import de.anomic.yacy.yacyDHTAction; import de.anomic.yacy.yacySearch; import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacyURL; @@ -174,7 +173,7 @@ public final class plasmaSearchEvent { final Iterator> ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator(); Map.Entry entry; int maxcount = -1; - double mindhtdistance = 1.1, d; + long mindhtdistance = Long.MAX_VALUE, l; String wordhash; while (ci.hasNext()) { entry = ci.next(); @@ -185,10 +184,10 @@ public final class plasmaSearchEvent { IAmaxcounthash = wordhash; maxcount = container.size(); } - d = yacyDHTAction.dhtDistance(wordIndex.seedDB.mySeed().hash, wordhash); - if (d < mindhtdistance) { + l = yacySeed.dhtDistance(wordhash, wordIndex.seedDB.mySeed()); + if (l < mindhtdistance) { // calculate the word hash that is closest to our dht position - mindhtdistance = d; + mindhtdistance = l; IAneardhthash = wordhash; } IACount.put(wordhash, Integer.valueOf(container.size())); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 0b1bb7424..e35bbd8e7 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -2005,8 +2005,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, Math.min(8, (int) (this.webIndex.seedDB.sizeConnected() * maxDist)), dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash(), maxDist); + final ArrayList seeds = webIndex.peerActions.dhtAction.getDHTTargets(webIndex.seedDB, log, peerCount, 9, dhtChunk.firstContainer().getWordHash(), dhtChunk.lastContainer().getWordHash()); if (seeds.size() < peerCount) { log.logWarning("found not enough (" + seeds.size() + ") peers for distribution for dhtchunk [" + dhtChunk.firstContainer().getWordHash() + " .. " + dhtChunk.lastContainer().getWordHash() + "]"); return false; diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 486e8cf3b..970e6ea0d 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -479,7 +479,7 @@ public final class yacyClient { try { result = nxTools.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000), "UTF-8"); } catch (final IOException e) { - yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(target.hash, wordhashes.substring(0, 12))); + yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacySeed.dhtDistance(wordhashes.substring(0, 12), target)); //yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage()); return null; } @@ -492,8 +492,7 @@ public final class yacyClient { + " (zero response), score=" + target.selectscore + ", DHTdist=" - + yacyDHTAction.dhtDistance(target.hash, wordhashes - .substring(0, 12))); + + yacySeed.dhtDistance(wordhashes.substring(0, 12), target)); return null; } @@ -653,9 +652,7 @@ public final class yacyClient { + ", score=" + target.selectscore + ", DHTdist=" - + ((wordhashes.length() < 12) ? "void" : Double - .toString(yacyDHTAction.dhtDistance(target.hash, - wordhashes.substring(0, 12)))) + + ((wordhashes.length() < 12) ? "void" : yacySeed.dhtDistance(wordhashes.substring(0, 12), target)) + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references")); diff --git a/source/de/anomic/yacy/yacyDHTAction.java b/source/de/anomic/yacy/yacyDHTAction.java index 3a9893287..ebcfe0b90 100644 --- a/source/de/anomic/yacy/yacyDHTAction.java +++ b/source/de/anomic/yacy/yacyDHTAction.java @@ -302,35 +302,13 @@ public class yacyDHTAction { public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash) { if (seedDB == null) return false; if (seedDB.mySeed().isPotential()) return false; - final double distance = dhtDistance(seedDB.mySeed().hash, wordhash); - final double max = 1.2 / seedDB.sizeConnected(); + final long distance = yacySeed.dhtDistance(wordhash, seedDB.mySeed()); + final long max = Long.MAX_VALUE / seedDB.sizeConnected() * 2; //System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max); return (distance > 0) && (distance <= max); } - public static double dhtDistance(final String peer, final String word) { - // the dht distance is a positive value between 0 and 1 - // if the distance is small, the word more probably belongs to the peer - final double d = hashDistance(peer, word); - if (d > 0) { - return d; // case where the word is 'before' the peer - } - return 1 + d; // wrap-around case - } - - private static double hashDistance(final String from, final String to) { - // computes the distance between two hashes. - // the maximum distance between two hashes is 1, the minimum -1 - // this can be used like "from - to" - // the result is positive if from > to - assert (from != null); - assert (to != null); - assert (from.length() == 12) : "from.length = " + from.length() + ", from = " + from; - assert (to.length() == 12) : "to.length = " + to.length() + ", to = " + to; - return ((double) (kelondroBase64Order.enhancedCoder.cardinal(from.getBytes()) - kelondroBase64Order.enhancedCoder.cardinal(to.getBytes()))) / ((double) Long.MAX_VALUE); - } - - public synchronized ArrayList getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey, final double maxDist) { + public synchronized ArrayList getDHTTargets(final yacySeedDB seedDB, final serverLog log, final int primaryPeerCount, final int reservePeerCount, final String firstKey, final String lastKey) { // find a list of DHT-peers assert firstKey != null; assert lastKey != null; @@ -347,7 +325,7 @@ public class yacyDHTAction { //double ownDistance = Math.min(yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, firstKey), yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, lastKey)); //double maxDistance = Math.min(ownDistance, maxDist); - double firstdist, lastdist; + long firstdist, lastdist; Iterator e = this.getAcceptRemoteIndexSeeds(lastKey); final TreeSet doublecheck = new TreeSet(kelondroBase64Order.enhancedComparator); int maxloop = Math.min(100, seedDB.sizeConnected()); // to ensure termination @@ -355,11 +333,9 @@ public class yacyDHTAction { while ((e.hasNext()) && (seeds.size() < (primaryPeerCount + reservePeerCount)) && (maxloop-- > 0)) { seed = e.next(); if (seed == null || seed.hash == null) continue; - firstdist = yacyDHTAction.dhtDistance(seed.hash, firstKey); - lastdist = yacyDHTAction.dhtDistance(seed.hash, lastKey); - if (lastdist > maxDist) { - if (log != null && yacyCore.log.isFine()) log.logFine("Discarded too distant DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist); - } else if (doublecheck.contains(seed.hash)) { + firstdist = yacySeed.dhtDistance(firstKey, seed); + lastdist = yacySeed.dhtDistance(lastKey, seed); + if (doublecheck.contains(seed.hash)) { if (log != null && yacyCore.log.isFine()) log.logFine("Discarded double DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist); } else { if (log != null) log.logInfo("Selected " + ((seeds.size() < primaryPeerCount) ? "primary" : "reserve") + " DHT target peer " + seed.getName() + ":" + seed.hash + ", distance2first = " + firstdist + ", distance2last = " + lastdist); diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index e722d79c9..8f2bc0a96 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -187,7 +187,7 @@ public class yacySearch extends Thread { Iterator dhtEnum; int c; String wordhash; - double distance; + long distance; Iterator iter = wordhashes.iterator(); while (iter.hasNext()) { wordhash = iter.next(); @@ -196,8 +196,7 @@ public class yacySearch extends Thread { while (dhtEnum.hasNext() && c > 0) { seed = dhtEnum.next(); if (seed == null || seed.hash == null) continue; - distance = yacyDHTAction.dhtDistance(seed.hash, wordhash); - if (distance > 0.2) continue; // catch bug in peer selection + distance = yacySeed.dhtDistance(wordhash, seed); if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer if (serverLog.isFine("PLASMA")) serverLog.logFine("PLASMA", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/" + distance + " for wordhash " + wordhash + ", score " + c); ranking.addScore(seed.hash, c--); diff --git a/source/de/anomic/yacy/yacySeed.java b/source/de/anomic/yacy/yacySeed.java index d68e7b1a9..19d1e62ed 100644 --- a/source/de/anomic/yacy/yacySeed.java +++ b/source/de/anomic/yacy/yacySeed.java @@ -668,27 +668,122 @@ public class yacySeed implements Cloneable { return type.equals(yacySeed.PEERTYPE_SENIOR) || type.equals(yacySeed.PEERTYPE_PRINCIPAL); } - public static final long minDHTNumber = kelondroBase64Order.enhancedCoder.cardinal(kelondroBase64Order.zero(12)); - public static final long maxDHTDistance = Long.MAX_VALUE; - - public double dhtPosition() { - // normalized to 1.0 + public final long dhtPosition() { + // normalized to Long.MAX_VALUE return dhtPosition(this.hash); } - public static double dhtPosition(final String ahash) { + private final static double dhtPositionDouble(final String wordHash) { // normalized to 1.0 - return ((double) kelondroBase64Order.enhancedCoder.cardinal(ahash.getBytes())) / ((double) maxDHTDistance); + long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes()); + assert c != Long.MAX_VALUE; + if (c == Long.MAX_VALUE) return 0.999999999999; + return ((double) c) / ((double) Long.MAX_VALUE); } - - public final static double dhtDistance(final String from, final String to) { - // computes a virtual distance, the result must be set in relation to maxDHTDistace - // if the distance is small, this peer is more responsible for that word hash - // if the distance is big, this peer is less responsible for that word hash - if (from == null) return dhtPosition(to); - final double fromPos = dhtPosition(from); - final double toPos = dhtPosition(to); - return (fromPos <= toPos) ? (toPos - fromPos) : (1.0 - fromPos + toPos); + + public final static long dhtPosition(final String wordHash) { + // normalized to Long.MAX_VALUE + long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes()); + assert c != Long.MAX_VALUE; + if (c == Long.MAX_VALUE) return Long.MAX_VALUE - 1; + return c; + } + + /** + * calculate the DHT position for horizontal and vertical performance scaling: + * horizontal: scale with number of words + * vertical: scale with number of references for every word + * The vertical scaling is selected using the corresponding reference hash, the url hash + * This has the effect that every vertical position accumulates references for the same url + * and the urls are not spread over all positions of the DHT. To use this effect, the + * horizontal DHT position must be normed to a 'rest' value of a partition size + * This method is compatible to the classic DHT computation as always one of the vertical + * DHT position corresponds to the classic position. + * @param wordHash, the hash of the RWI + * @param partitions, the number of partitions should be computed with partitions = 2**n, n = scaling factor + * @param urlHash, the hash of a reference + * @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position + */ + private final static double dhtPositionDouble(final String wordHash, final String urlHash, final int e) { + assert wordHash != null; + assert urlHash != null; + if (urlHash == null) return dhtPositionDouble(wordHash); + // calculate the primary DHT position: + // this is done first using the 'classic' dht position and then + // calculation an alternative 'first' position considering the partition size + // because of the number of partitions, the 'original' position is reached as one of the + // alternative dht positions within the partitions + double primary = dhtPositionDouble(wordHash); // the hash position for horizontal performance scaling + // the number of partitions is 2 ** e, the partitions may grow exponentially (every time it is doubled) + double partitions = (double) (1L << e); + // modification of the primary position using the partitions to create a normalization: + double normalization = Math.floor(primary * partitions) / partitions; + // calculate the shift: the alternative position for vertical performance scaling + double shift = Math.floor(dhtPositionDouble(urlHash) * partitions) / partitions; + // the final position is the primary, normalized position plus the shift + double p = primary - normalization + shift; + // one of the possible shift positions points to the original dht position: + // this is where the shift is equal to the normalization, when + // Math.floor(dhtPosition(wordHash) * partitions) == Math.floor(dhtPosition(urlHash) * partitions) + assert p < 1.0 : "p = " + p; // because of the normalization never an overflow should occur + assert p >= 0.0 : "p = " + p; + return (p < 1.0) ? p : p - 1.0; + } + + public final static long dhtPosition(final String wordHash, final String urlHash, final int e) { + assert wordHash != null; + assert urlHash != null; + if (urlHash == null || e < 1) return dhtPosition(wordHash); + // the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e) + assert e > 0; + long partitionMask = (1L << (Long.SIZE - 1 - e)) - 1L; + return (dhtPosition(wordHash) & partitionMask) | (dhtPosition(urlHash) & ~partitionMask); + } + + /** + * compute all vertical DHT positions for a given word + * @param wordHash, the hash of the word + * @param partitions, the number of partitions of the DHT + * @return a vector of long values, the possible DHT positions + */ + private final static double[] dhtPositionsDouble(final String wordHash, final int e) { + assert wordHash != null; + int partitions = 1 << e; + double[] d = new double[partitions]; + double primary = dhtPositionDouble(wordHash); + double partitionSize = 1.0 / (double) partitions; + d[0] = primary - Math.floor(primary * partitions) / partitions; + for (int i = 1; i < partitions; i++) { + d[i] = d[i - 1] + partitionSize; + } + return d; + } + + public final static long[] dhtPositions(final String wordHash, final int e) { + assert wordHash != null; + int partitions = 1 << e; + long[] l = new long[partitions]; + long partitionSize = 1L << (Long.SIZE - 1 - e); + l[0] = dhtPosition(wordHash) & (partitionSize - 1L); + for (int i = 1; i < partitions; i++) { + l[i] = l[i - 1] + partitionSize; + } + return l; + } + + public final static long dhtDistance(final String word, final yacySeed peer) { + return dhtDistance(word, peer.hash); + } + + private final static long dhtDistance(final String from, final String to) { + // the dht distance is a positive value between 0 and 1 + // if the distance is small, the word more probably belongs to the peer + assert to != null; + assert from != null; + final long toPos = dhtPosition(to); + final long fromPos = dhtPosition(from); + final long d = toPos - fromPos; + return (d >= 0) ? d : (d + Long.MAX_VALUE) + 1; } private static String bestGap(final yacySeedDB seedDB) { @@ -697,7 +792,7 @@ public class yacySeed implements Cloneable { return randomHash(); } // find gaps - final TreeMap gaps = hashGaps(seedDB); + final TreeMap gaps = hashGaps(seedDB); // take one gap; prefer biggest but take also another smaller by chance String interval = null; @@ -709,19 +804,18 @@ public class yacySeed implements Cloneable { if (interval == null) return randomHash(); // find dht position and size of gap - final double gapsize = dhtDistance(interval.substring(0, 12), interval.substring(12)); - assert gapsize >= 0.0; - double gappos = dhtPosition(interval.substring(0, 12)) + (gapsize / 2); - if (gappos >= 1.0) gappos = gappos - 1.0; // fix overflow; can only occur for gap at end + final long gaphalf = dhtDistance(interval.substring(0, 12), interval.substring(12)) >> 1; + long p = dhtPosition(interval.substring(0, 12)); + long gappos = (Long.MAX_VALUE - p >= gaphalf) ? p + gaphalf : (p - Long.MAX_VALUE) + gaphalf; return positionToHash(gappos); } - private static TreeMap hashGaps(final yacySeedDB seedDB) { - final TreeMapgaps = new TreeMap(); + private static TreeMap hashGaps(final yacySeedDB seedDB) { + final TreeMapgaps = new TreeMap(); if (seedDB == null) return gaps; final Iterator i = seedDB.seedsConnected(true, false, null, (float) 0.0); - double d; + long l; yacySeed s0 = null, s1, first = null; while (i.hasNext()) { s1 = i.next(); @@ -730,19 +824,14 @@ public class yacySeed implements Cloneable { first = s0; continue; } - if (s0.hash.equals("fF99P8dMio7M")) { - System.out.print(0); - } - d = dhtDistance(s0.hash, s1.hash); - assert d >= 0.0; - gaps.put(d, s0.hash + s1.hash); + l = dhtDistance(s0.hash, s1.hash); + gaps.put(l, s0.hash + s1.hash); s0 = s1; } // compute also the last gap if ((first != null) && (s0 != null)) { - d = dhtDistance(s0.hash, first.hash); - assert d >= 0.0; - gaps.put(d, s0.hash + first.hash); + l = dhtDistance(s0.hash, first.hash); + gaps.put(l, s0.hash + first.hash); } return gaps; } @@ -752,19 +841,13 @@ public class yacySeed implements Cloneable { assert t >= 0.0 : "t = " + t; assert t < 1.0 : "t = " + t; - // now calculate a hash that is closest to the best position - double d, bestD = Double.MAX_VALUE; - final int tries = 128; - String hash, bestHash = null; - for (int v = 0; v < tries; v++) { - hash = randomHash(); - d = dhtPosition(hash); - if (Math.abs(d - t) < bestD) { - bestD = Math.abs(d - t); - bestHash = hash; - } - } - return bestHash; + return new String(kelondroBase64Order.enhancedCoder.uncardinal((long) (((double) Long.MAX_VALUE) * t))) + "AA"; + } + + private static String positionToHash(final long l) { + // transform the position of a peer position into a close peer hash + + return new String(kelondroBase64Order.enhancedCoder.uncardinal(l)) + "AA"; } public static yacySeed genLocalSeed(final yacySeedDB db) { @@ -923,4 +1006,46 @@ public class yacySeed implements Cloneable { } } + public static void main(String[] args) { + // java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFn76 + // java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3 + // test the DHT position calculation + String wordHash = args[0]; + double dhtd; + long dhtl; + int partitionExponent = 0; + if (args.length == 3) { + // the horizontal and vertical position calculation + String urlHash = args[1]; + partitionExponent = Integer.parseInt(args[2]); + dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent); + dhtl = dhtPosition(wordHash, urlHash, partitionExponent); + } else { + // only a horizontal position calculation + dhtd = dhtPositionDouble(wordHash); + dhtl = dhtPosition(wordHash); + } + System.out.println("DHT Double = " + dhtd); + System.out.println("DHT Long = " + dhtl); + System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE)); + System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd)); + System.out.println("DHT as b64 from Double = " + positionToHash(dhtd)); + System.out.println("DHT as b64 from Long = " + positionToHash(dhtl)); + + System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: "); + double[] d = dhtPositionsDouble(wordHash, partitionExponent); + for (int i = 0; i < d.length; i++) { + if (i > 0) System.out.print(", "); + System.out.print(positionToHash(d[i])); + } + System.out.println(); + + System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : "); + long[] l = dhtPositions(wordHash, partitionExponent); + for (int i = 0; i < l.length; i++) { + if (i > 0) System.out.print(", "); + System.out.print(positionToHash(l[i])); + } + System.out.println(); + } }