mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added more methods to control the vertical DHT (not yet active .. )
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5514 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
4ef6b15eb8
commit
419469ac27
|
@ -14,6 +14,7 @@ network.unit.search.time = 4
|
|||
network.unit.dht = true
|
||||
network.unit.dhtredundancy.junior = 1
|
||||
network.unit.dhtredundancy.senior = 3
|
||||
network.unit.dht.partitionExponent = 1
|
||||
network.unit.remotecrawl.speed = 6
|
||||
network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt
|
||||
network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt
|
||||
|
|
|
@ -13,6 +13,7 @@ network.unit.domain = local
|
|||
network.unit.dht = false
|
||||
network.unit.dhtredundancy.junior = 1
|
||||
network.unit.dhtredundancy.senior = 1
|
||||
network.unit.dht.partitionExponent = 0
|
||||
network.unit.remotecrawl.speed = 600
|
||||
|
||||
# each network may use different yacy distributions.
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
# network.unit.description = <any string, just informal; appears in network graphic>
|
||||
# network.unit.domain = 'global'|'local'|'any'
|
||||
# network.unit.dhtredundancy = <integer number, 0 means no DHT enabled>
|
||||
# network.unit.dht.partitionExponent = <integer number, 0 means no DHT parition, 1 is partition in two, 2 is partition in four and so on>
|
||||
# network.unit.bootstrap.seedlist<n> = <an url to a seedlists-file, which is stored by a principal peer>
|
||||
# network.unit.protocol.control = 'uncontrolled'|'moderated'|'controlled'
|
||||
# network.unit.protocol.request.authentication.method = 'salted-magic'
|
||||
|
|
|
@ -10,6 +10,7 @@ network.unit.search.time = 4
|
|||
network.unit.dht = false
|
||||
network.unit.dhtredundancy.junior = 1
|
||||
network.unit.dhtredundancy.senior = 1
|
||||
network.unit.dht.partitionExponent = 0
|
||||
network.unit.remotecrawl.speed = 1
|
||||
|
||||
# each network may use different yacy distributions.
|
||||
|
|
|
@ -39,6 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order;
|
|||
import de.anomic.kelondro.kelondroDigest;
|
||||
import de.anomic.kelondro.kelondroException;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
import de.anomic.yacy.yacyPeerSelection;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import de.anomic.yacy.yacySeedDB;
|
||||
|
||||
|
@ -59,6 +60,7 @@ public class plasmaDHTChunk {
|
|||
private indexContainer[] indexContainers = null;
|
||||
private HashMap<String, indexURLReference> urlCache; // String (url-hash) / plasmaCrawlLURL.Entry
|
||||
private int idxCount;
|
||||
private ArrayList<yacySeed> targets;
|
||||
|
||||
private long selectionStartTime = 0;
|
||||
private long selectionEndTime = 0;
|
||||
|
@ -85,6 +87,10 @@ public class plasmaDHTChunk {
|
|||
return this.idxCount;
|
||||
}
|
||||
|
||||
public ArrayList<yacySeed> targets() {
|
||||
return this.targets;
|
||||
}
|
||||
|
||||
private int indexCounter() {
|
||||
int c = 0;
|
||||
for (int i = 0; i < indexContainers.length; i++) {
|
||||
|
@ -105,17 +111,43 @@ public class plasmaDHTChunk {
|
|||
return this.status;
|
||||
}
|
||||
|
||||
public plasmaDHTChunk(final serverLog log, final plasmaWordIndex wordIndex, final int minCount, final int maxCount, final int maxtime, String startPointHash) {
|
||||
public plasmaDHTChunk(
|
||||
final serverLog log,
|
||||
final plasmaWordIndex wordIndex,
|
||||
final int minContainerCount,
|
||||
final int maxContainerCount,
|
||||
final int maxtime,
|
||||
final String startPointHash) {
|
||||
try {
|
||||
this.log = log;
|
||||
this.wordIndex = wordIndex;
|
||||
this.startPointHash = startPointHash;
|
||||
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
|
||||
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
|
||||
this.selectionStartTime = System.currentTimeMillis();
|
||||
|
||||
// find target peers for the containers
|
||||
int peerCount = wordIndex.seedDB.netRedundancy * 3 + 1;
|
||||
final Iterator<yacySeed> seedIter = yacyPeerSelection.getAcceptRemoteIndexSeeds(wordIndex.seedDB, this.startPointHash, peerCount, false);
|
||||
this.targets = new ArrayList<yacySeed>();
|
||||
while (seedIter.hasNext() && peerCount-- > 0) this.targets.add(seedIter.next());
|
||||
|
||||
// select the containers:
|
||||
|
||||
// select from RAM
|
||||
final int refcountRAM = selectTransferContainersResource(this.startPointHash, true, maxContainerCount, maxtime);
|
||||
if (refcountRAM >= minContainerCount) {
|
||||
if (this.log.isFine()) log.logFine("DHT selection from RAM: " + refcountRAM + " entries");
|
||||
} else {
|
||||
// select from DB
|
||||
final int refcountFile = selectTransferContainersResource(this.startPointHash, false, maxContainerCount, maxtime);
|
||||
if (this.log.isFine()) log.logFine("DHT selection from FILE: " + refcountFile + " entries, RAM provided only " + refcountRAM + " entries");
|
||||
}
|
||||
|
||||
this.selectionEndTime = System.currentTimeMillis();
|
||||
|
||||
// count the indexes, can be smaller as expected
|
||||
this.idxCount = indexCounter();
|
||||
if (this.idxCount < minCount) {
|
||||
if (this.idxCount < minContainerCount) {
|
||||
if (this.log.isFine()) log.logFine("Too few (" + this.idxCount + ") indexes selected for transfer.");
|
||||
this.status = chunkStatus_FAILED;
|
||||
}
|
||||
|
@ -125,31 +157,21 @@ public class plasmaDHTChunk {
|
|||
}
|
||||
|
||||
public static String selectTransferStart() {
|
||||
// a random start point. It is dangerous to take a computed start point, because it could cause a flooding at specific target peers, because
|
||||
// the community of all peers would use the same method for target computation. It is better to just use a random start point.
|
||||
return kelondroBase64Order.enhancedCoder.encode(kelondroDigest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
|
||||
}
|
||||
|
||||
private void selectTransferContainers(final String hash, final int mincount, final int maxcount, final int maxtime) throws InterruptedException {
|
||||
try {
|
||||
this.selectionStartTime = System.currentTimeMillis();
|
||||
final int refcountRAM = selectTransferContainersResource(hash, true, maxcount, maxtime);
|
||||
if (refcountRAM >= mincount) {
|
||||
if (this.log.isFine()) log.logFine("DHT selection from RAM: " + refcountRAM + " entries");
|
||||
return;
|
||||
}
|
||||
final int refcountFile = selectTransferContainersResource(hash, false, maxcount, maxtime);
|
||||
if (this.log.isFine()) log.logFine("DHT selection from FILE: " + refcountFile + " entries, RAM provided only " + refcountRAM + " entries");
|
||||
return;
|
||||
} finally {
|
||||
this.selectionEndTime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
||||
private int selectTransferContainersResource(final String hash, final boolean ram, final int maxcount, final int maxtime) throws InterruptedException {
|
||||
private int selectTransferContainersResource(final String hash, final boolean ram, final int maxContainerCount, final int maxtime) throws InterruptedException {
|
||||
// if (maxcount > 500) { maxcount = 500; } // flooding & OOM reduce
|
||||
// the hash is a start hash from where the indexes are picked
|
||||
final ArrayList<indexContainer> tmpContainers = new ArrayList<indexContainer>(maxcount);
|
||||
|
||||
// the peer hash of the first peer is the upper limit for the collection
|
||||
String limitHash = this.targets.get(0).hash;
|
||||
|
||||
final ArrayList<indexContainer> tmpContainers = new ArrayList<indexContainer>(maxContainerCount);
|
||||
try {
|
||||
final Iterator<indexContainer> indexContainerIterator = wordIndex.indexContainerSet(hash, ram, true, maxcount).iterator();
|
||||
final Iterator<indexContainer> indexContainerIterator = wordIndex.indexContainerSet(hash, ram, true, maxContainerCount).iterator();
|
||||
indexContainer container;
|
||||
Iterator<indexRWIRowEntry> urlIter;
|
||||
indexRWIRowEntry iEntry;
|
||||
|
@ -158,15 +180,14 @@ public class plasmaDHTChunk {
|
|||
int wholesize;
|
||||
|
||||
urlCache = new HashMap<String, indexURLReference>();
|
||||
final long maximumDistanceLong = Long.MAX_VALUE / wordIndex.seedDB.sizeConnected() * wordIndex.netRedundancy * 2;
|
||||
final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
|
||||
while (
|
||||
(maxcount > refcount) &&
|
||||
(maxContainerCount > refcount) &&
|
||||
(indexContainerIterator.hasNext()) &&
|
||||
((container = indexContainerIterator.next()) != null) &&
|
||||
(container.size() > 0) &&
|
||||
((tmpContainers.size() == 0) ||
|
||||
(Math.abs(yacySeed.dhtPosition(container.getWordHash()) - yacySeed.dhtPosition(tmpContainers.get(0).getWordHash())) < maximumDistanceLong)) &&
|
||||
(kelondroBase64Order.enhancedComparator.compare(container.getWordHash(), limitHash) < 0)) &&
|
||||
(System.currentTimeMillis() < timeout)
|
||||
) {
|
||||
// check for interruption
|
||||
|
@ -178,7 +199,7 @@ public class plasmaDHTChunk {
|
|||
wholesize = container.size();
|
||||
urlIter = container.entries();
|
||||
// iterate over indexes to fetch url entries and store them in the urlCache
|
||||
while ((urlIter.hasNext()) && (maxcount > refcount) && (System.currentTimeMillis() < timeout)) {
|
||||
while ((urlIter.hasNext()) && (maxContainerCount > refcount) && (System.currentTimeMillis() < timeout)) {
|
||||
// CPU & IO reduce
|
||||
// try { Thread.sleep(50); } catch (InterruptedException e) { }
|
||||
|
||||
|
|
|
@ -147,8 +147,11 @@ public class plasmaGrafics {
|
|||
final Iterator<String> i = query.queryHashes.iterator();
|
||||
eventPicture.setColor(ymageMatrix.GREY);
|
||||
while (i.hasNext()) {
|
||||
angle = (int) (360.0 * (((double) yacySeed.dhtPosition(i.next())) / ((double) Long.MAX_VALUE)));
|
||||
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
|
||||
long[] positions = yacySeed.dhtPositions(i.next(), seedDB.partitionExponent);
|
||||
for (int j = 0; j < positions.length; j++) {
|
||||
angle = (int) (360.0 * (((double) positions[j]) / ((double) Long.MAX_VALUE)));
|
||||
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
|
||||
}
|
||||
}
|
||||
|
||||
return eventPicture;
|
||||
|
|
|
@ -171,7 +171,6 @@ import de.anomic.yacy.yacyClient;
|
|||
import de.anomic.yacy.yacyCore;
|
||||
import de.anomic.yacy.yacyNewsPool;
|
||||
import de.anomic.yacy.yacyNewsRecord;
|
||||
import de.anomic.yacy.yacyPeerSelection;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import de.anomic.yacy.yacyTray;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
|
@ -308,7 +307,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, "");
|
||||
final boolean useCommons = getConfigBool("index.storeCommons", false);
|
||||
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
|
||||
webIndex = new plasmaWordIndex(networkName, log, indexPrimaryPath, indexSecondaryPath, wordCacheMaxCount, useCommons, redundancy);
|
||||
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
|
||||
webIndex = new plasmaWordIndex(
|
||||
networkName,
|
||||
log,
|
||||
indexPrimaryPath,
|
||||
indexSecondaryPath,
|
||||
wordCacheMaxCount,
|
||||
useCommons,
|
||||
redundancy,
|
||||
paritionExponent);
|
||||
crawlResults = new ResultURLs();
|
||||
|
||||
// start yacy core
|
||||
|
@ -746,7 +754,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
|
||||
final boolean useCommons = getConfigBool("index.storeCommons", false);
|
||||
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
|
||||
this.webIndex = new plasmaWordIndex(getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""), getLog(), indexPrimaryPath, indexSecondaryPath, wordCacheMaxCount, useCommons, redundancy);
|
||||
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
|
||||
this.webIndex = new plasmaWordIndex(
|
||||
getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""),
|
||||
getLog(),
|
||||
indexPrimaryPath,
|
||||
indexSecondaryPath,
|
||||
wordCacheMaxCount,
|
||||
useCommons,
|
||||
redundancy,
|
||||
paritionExponent);
|
||||
// we need a new stacker, because this uses network-specific attributes to sort out urls (local, global)
|
||||
this.crawlStacker = new CrawlStacker(
|
||||
crawlQueues,
|
||||
|
@ -1992,7 +2009,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
try {
|
||||
// find a list of DHT-peers
|
||||
if (log != null) log.logInfo("Collecting DHT target peers for first_hash = " + dhtChunk.firstContainer().getWordHash() + ", last_hash = " + dhtChunk.lastContainer().getWordHash());
|
||||
final Iterator<yacySeed> seedIter = yacyPeerSelection.getAcceptRemoteIndexSeeds(webIndex.seedDB, dhtChunk.lastContainer().getWordHash(), peerCount + 9, false);
|
||||
final Iterator<yacySeed> seedIter = dhtChunk.targets().iterator();
|
||||
|
||||
// send away the indexes to all these peers
|
||||
int hc1 = 0;
|
||||
|
|
|
@ -113,15 +113,21 @@ public final class plasmaWordIndex implements indexRI {
|
|||
public CrawlProfile.entry defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
|
||||
private final File queuesRoot;
|
||||
public yacyPeerActions peerActions;
|
||||
public int netRedundancy;
|
||||
|
||||
public plasmaWordIndex(final String networkName, final serverLog log, final File indexPrimaryRoot, final File indexSecondaryRoot, final int entityCacheMaxSize, boolean useCommons, int redundancy) {
|
||||
public plasmaWordIndex(
|
||||
final String networkName,
|
||||
final serverLog log,
|
||||
final File indexPrimaryRoot,
|
||||
final File indexSecondaryRoot,
|
||||
final int entityCacheMaxSize,
|
||||
final boolean useCommons,
|
||||
final int redundancy,
|
||||
final int partitionExponent) {
|
||||
if (networkName == null || networkName.length() == 0) {
|
||||
log.logSevere("no network name given - shutting down");
|
||||
System.exit(0);
|
||||
}
|
||||
this.log = log;
|
||||
this.netRedundancy = redundancy;
|
||||
this.primaryRoot = new File(indexPrimaryRoot, networkName);
|
||||
this.secondaryRoot = new File(indexSecondaryRoot, networkName);
|
||||
File indexPrimaryTextLocation = new File(this.primaryRoot, "TEXT");
|
||||
|
@ -229,7 +235,9 @@ public final class plasmaWordIndex implements indexRI {
|
|||
new File(networkRoot, "seed.new.heap"),
|
||||
new File(networkRoot, "seed.old.heap"),
|
||||
new File(networkRoot, "seed.pot.heap"),
|
||||
mySeedFile
|
||||
mySeedFile,
|
||||
redundancy,
|
||||
partitionExponent
|
||||
);
|
||||
|
||||
// create or init news database
|
||||
|
|
|
@ -42,9 +42,9 @@ import de.anomic.server.logging.serverLog;
|
|||
|
||||
public class yacyPeerSelection {
|
||||
|
||||
public static void selectDHTPositions(final yacySeedDB seedDB, String wordhash, int redundancy, HashMap<String, yacySeed> regularSeeds, kelondroMScoreCluster<String> ranking) {
|
||||
public static void selectDHTPositions(final yacySeedDB seedDB, String wordhash, int redundancy, int partitionExponent, HashMap<String, yacySeed> regularSeeds, kelondroMScoreCluster<String> ranking) {
|
||||
// this method is called from the search target computation
|
||||
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
|
||||
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, partitionExponent);
|
||||
yacySeed seed;
|
||||
long distance;
|
||||
for (int v = 0; v < dhtVerticalTargets.length; v++) {
|
||||
|
@ -65,16 +65,16 @@ public class yacyPeerSelection {
|
|||
}
|
||||
}
|
||||
|
||||
public static boolean verifyIfOwnWord(final yacySeedDB seedDB, String wordhash, int redundancy) {
|
||||
public static boolean verifyIfOwnWord(final yacySeedDB seedDB, String wordhash, int redundancy, int partitionExponent) {
|
||||
String myHash = seedDB.mySeed().hash;
|
||||
//long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
|
||||
//for (int v = 0; v < dhtVerticalTargets.length; v++) {
|
||||
//wordhash = yacySeed.positionToHash(dhtVerticalTargets[0]);
|
||||
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, partitionExponent);
|
||||
for (int v = 0; v < dhtVerticalTargets.length; v++) {
|
||||
wordhash = yacySeed.positionToHash(dhtVerticalTargets[0]);
|
||||
Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true);
|
||||
while (dhtEnum.hasNext()) {
|
||||
if (dhtEnum.next().hash.equals(myHash)) return true;
|
||||
}
|
||||
//}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,7 @@ public class yacySearch extends Thread {
|
|||
//return (yacySeed[]) l.toArray();
|
||||
}
|
||||
|
||||
private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final Set<String> wordhashes, int seedcount, int redundancy) {
|
||||
private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final Set<String> wordhashes, int seedcount, int redundancy, int partitionExponent) {
|
||||
// find out a specific number of seeds, that would be relevant for the given word hash(es)
|
||||
// the result is ordered by relevance: [0] is most relevant
|
||||
// the seedcount is the maximum number of wanted results
|
||||
|
@ -186,7 +186,7 @@ public class yacySearch extends Thread {
|
|||
Iterator<yacySeed> dhtEnum;
|
||||
Iterator<String> iter = wordhashes.iterator();
|
||||
while (iter.hasNext()) {
|
||||
yacyPeerSelection.selectDHTPositions(seedDB, iter.next(), redundancy, regularSeeds, ranking);
|
||||
yacyPeerSelection.selectDHTPositions(seedDB, iter.next(), redundancy, partitionExponent, regularSeeds, ranking);
|
||||
}
|
||||
|
||||
// put in seeds according to size of peer
|
||||
|
@ -262,7 +262,15 @@ public class yacySearch extends Thread {
|
|||
|
||||
// prepare seed targets and threads
|
||||
assert language != null;
|
||||
final yacySeed[] targetPeers = (clusterselection == null) ? selectSearchTargets(wordIndex.seedDB, plasmaSearchQuery.hashes2Set(wordhashes), targets, wordIndex.netRedundancy) : selectClusterPeers(wordIndex.seedDB, clusterselection);
|
||||
final yacySeed[] targetPeers =
|
||||
(clusterselection == null) ?
|
||||
selectSearchTargets(
|
||||
wordIndex.seedDB,
|
||||
plasmaSearchQuery.hashes2Set(wordhashes),
|
||||
targets,
|
||||
wordIndex.seedDB.netRedundancy,
|
||||
wordIndex.seedDB.partitionExponent)
|
||||
: selectClusterPeers(wordIndex.seedDB, clusterselection);
|
||||
if (targetPeers == null) return new yacySearch[0];
|
||||
targets = targetPeers.length;
|
||||
if (targets == 0) return new yacySearch[0];
|
||||
|
|
|
@ -68,7 +68,6 @@ import de.anomic.tools.crypt;
|
|||
|
||||
public class yacySeed implements Cloneable {
|
||||
|
||||
public static final int partitionExponent = 1;
|
||||
public static final int maxsize = 4096;
|
||||
/**
|
||||
* <b>substance</b> "sI" (send index/words)
|
||||
|
@ -676,20 +675,18 @@ public class yacySeed implements Cloneable {
|
|||
return type.equals(yacySeed.PEERTYPE_SENIOR) || type.equals(yacySeed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated this does not reflect the vertical DHT. A peer may have several positions now.
|
||||
*/
|
||||
public final long dhtPosition() {
|
||||
// normalized to Long.MAX_VALUE
|
||||
return dhtPosition(this.hash);
|
||||
}
|
||||
|
||||
private final static double dhtPositionDouble(final String wordHash) {
|
||||
// normalized to 1.0
|
||||
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
|
||||
assert c != Long.MAX_VALUE;
|
||||
if (c == Long.MAX_VALUE) return 0.999999999999;
|
||||
return ((double) c) / ((double) Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
public final static long dhtPosition(final String wordHash) {
|
||||
/**
|
||||
* @deprecated use dhtPosition(wordHash, urlHash, partitionExponent) instead
|
||||
*/
|
||||
private final static long dhtPosition(final String wordHash) {
|
||||
// normalized to Long.MAX_VALUE
|
||||
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
|
||||
assert c != Long.MAX_VALUE;
|
||||
|
@ -712,67 +709,51 @@ public class yacySeed implements Cloneable {
|
|||
* @param urlHash, the hash of a reference
|
||||
* @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position
|
||||
*/
|
||||
private final static double dhtPositionDouble(final String wordHash, final String urlHash, final int e) {
|
||||
assert wordHash != null;
|
||||
assert urlHash != null;
|
||||
if (urlHash == null) return dhtPositionDouble(wordHash);
|
||||
// calculate the primary DHT position:
|
||||
// this is done first using the 'classic' dht position and then
|
||||
// calculation an alternative 'first' position considering the partition size
|
||||
// because of the number of partitions, the 'original' position is reached as one of the
|
||||
// alternative dht positions within the partitions
|
||||
double primary = dhtPositionDouble(wordHash); // the hash position for horizontal performance scaling
|
||||
// the number of partitions is 2 ** e, the partitions may grow exponentially (every time it is doubled)
|
||||
double partitions = (double) (1L << e);
|
||||
// modification of the primary position using the partitions to create a normalization:
|
||||
double normalization = Math.floor(primary * partitions) / partitions;
|
||||
// calculate the shift: the alternative position for vertical performance scaling
|
||||
double shift = Math.floor(dhtPositionDouble(urlHash) * partitions) / partitions;
|
||||
// the final position is the primary, normalized position plus the shift
|
||||
double p = primary - normalization + shift;
|
||||
// one of the possible shift positions points to the original dht position:
|
||||
// this is where the shift is equal to the normalization, when
|
||||
// Math.floor(dhtPosition(wordHash) * partitions) == Math.floor(dhtPosition(urlHash) * partitions)
|
||||
assert p < 1.0 : "p = " + p; // because of the normalization never an overflow should occur
|
||||
assert p >= 0.0 : "p = " + p;
|
||||
return (p < 1.0) ? p : p - 1.0;
|
||||
}
|
||||
|
||||
public final static long dhtPosition(final String wordHash, final String urlHash, final int e) {
|
||||
public final static long dhtPosition(final String wordHash, final String urlHash, final int partitionExponent) {
|
||||
// this creates 1^^e different positions for the same word hash (according to url hash)
|
||||
assert wordHash != null;
|
||||
assert urlHash != null;
|
||||
if (urlHash == null || e < 1) return dhtPosition(wordHash);
|
||||
if (urlHash == null || partitionExponent < 1) return dhtPosition(wordHash);
|
||||
// the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
|
||||
assert e > 0;
|
||||
long partitionMask = (1L << (Long.SIZE - 1 - e)) - 1L;
|
||||
assert partitionExponent > 0;
|
||||
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
|
||||
// compute the position using a specific fragment of the word hash and the url hash:
|
||||
// - from the word hash take the (63 - <partitionExponent>) lower bits
|
||||
// - from the url hash take the (63 - <partitionExponent>) higher bits
|
||||
// in case that the partitionExpoent is 1, only one bit is taken from the urlHash,
|
||||
// which means that the partition is in two parts.
|
||||
// With partitionExponent = 2 it is divided in four parts and so on.
|
||||
return (dhtPosition(wordHash) & partitionMask) | (dhtPosition(urlHash) & ~partitionMask);
|
||||
}
|
||||
|
||||
public final static long dhtPosition(final String wordHash, final int verticalPosition, final int partitionExponent) {
|
||||
assert wordHash != null;
|
||||
if (partitionExponent == 0) return dhtPosition(wordHash);
|
||||
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
|
||||
long verticalMask = verticalPosition << (Long.SIZE - 1 - partitionExponent);
|
||||
return (dhtPosition(wordHash) & partitionMask) | verticalMask;
|
||||
}
|
||||
|
||||
public final static int verticalPosition(final String urlHash, final int partitionExponent) {
|
||||
assert urlHash != null;
|
||||
if (urlHash == null || partitionExponent < 1) return 0;
|
||||
assert partitionExponent > 0;
|
||||
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
|
||||
return (int) (dhtPosition(urlHash) & ~partitionMask) >> (Long.SIZE - 1 - partitionExponent);
|
||||
}
|
||||
|
||||
/**
|
||||
* compute all vertical DHT positions for a given word
|
||||
* This is used when a word is searched and the peers holding the word must be computed
|
||||
* @param wordHash, the hash of the word
|
||||
* @param partitions, the number of partitions of the DHT
|
||||
* @return a vector of long values, the possible DHT positions
|
||||
*/
|
||||
private final static double[] dhtPositionsDouble(final String wordHash, final int e) {
|
||||
public final static long[] dhtPositions(final String wordHash, final int partitionExponent) {
|
||||
assert wordHash != null;
|
||||
int partitions = 1 << e;
|
||||
double[] d = new double[partitions];
|
||||
double primary = dhtPositionDouble(wordHash);
|
||||
double partitionSize = 1.0 / (double) partitions;
|
||||
d[0] = primary - Math.floor(primary * partitions) / partitions;
|
||||
for (int i = 1; i < partitions; i++) {
|
||||
d[i] = d[i - 1] + partitionSize;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public final static long[] dhtPositions(final String wordHash, final int e) {
|
||||
assert wordHash != null;
|
||||
int partitions = 1 << e;
|
||||
int partitions = 1 << partitionExponent;
|
||||
long[] l = new long[partitions];
|
||||
long partitionSize = 1L << (Long.SIZE - 1 - e);
|
||||
long partitionSize = 1L << (Long.SIZE - 1 - partitionExponent);
|
||||
l[0] = dhtPosition(wordHash) & (partitionSize - 1L);
|
||||
for (int i = 1; i < partitions; i++) {
|
||||
l[i] = l[i - 1] + partitionSize;
|
||||
|
@ -1027,12 +1008,12 @@ public class yacySeed implements Cloneable {
|
|||
private static int verifiedOwn = 0;
|
||||
private static int verifiedNotOwn = 0;
|
||||
|
||||
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash, int redundancy) {
|
||||
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash, int redundancy, int partitionExponent) {
|
||||
// the guessIfOwnWord is a fast method that should only fail in case that a 'true' may be incorrect, but a 'false' shall always be correct
|
||||
if (guessIfOwnWord(seedDB, wordhash)) {
|
||||
if (guessIfOwnWord(seedDB, wordhash, partitionExponent)) {
|
||||
// this case must be verified, because it can be wrong.
|
||||
guessedOwn++;
|
||||
if (yacyPeerSelection.verifyIfOwnWord(seedDB, wordhash, redundancy)) {
|
||||
if (yacyPeerSelection.verifyIfOwnWord(seedDB, wordhash, redundancy, partitionExponent)) {
|
||||
// this is the correct case, but does not need to be an average case
|
||||
verifiedOwn++;
|
||||
//System.out.println("*** DEBUG shallBeOwnWord: true. guessed: true. verified/guessed ration = " + verifiedOwn + "/" + guessedOwn);
|
||||
|
@ -1064,12 +1045,11 @@ public class yacySeed implements Cloneable {
|
|||
|
||||
}
|
||||
|
||||
private static boolean guessIfOwnWord(final yacySeedDB seedDB, final String wordhash) {
|
||||
private static boolean guessIfOwnWord(final yacySeedDB seedDB, final String wordhash, int partitionExponent) {
|
||||
if (seedDB == null) return false;
|
||||
if (seedDB.mySeed().isPotential()) return false;
|
||||
int connected = seedDB.sizeConnected();
|
||||
if (connected == 0) return true;
|
||||
final long[] targets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
|
||||
final long[] targets = yacySeed.dhtPositions(wordhash, partitionExponent);
|
||||
final long mypos = yacySeed.dhtPosition(seedDB.mySeed().hash);
|
||||
for (int i = 0; i < targets.length; i++) {
|
||||
long distance = yacySeed.dhtDistance(targets[i], mypos);
|
||||
|
@ -1084,35 +1064,37 @@ public class yacySeed implements Cloneable {
|
|||
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3
|
||||
// test the DHT position calculation
|
||||
String wordHash = args[0];
|
||||
double dhtd;
|
||||
//double dhtd;
|
||||
long dhtl;
|
||||
int partitionExponent = 0;
|
||||
if (args.length == 3) {
|
||||
// the horizontal and vertical position calculation
|
||||
String urlHash = args[1];
|
||||
partitionExponent = Integer.parseInt(args[2]);
|
||||
dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
|
||||
//dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
|
||||
dhtl = dhtPosition(wordHash, urlHash, partitionExponent);
|
||||
} else {
|
||||
// only a horizontal position calculation
|
||||
dhtd = dhtPositionDouble(wordHash);
|
||||
//dhtd = dhtPositionDouble(wordHash);
|
||||
dhtl = dhtPosition(wordHash);
|
||||
}
|
||||
System.out.println("DHT Double = " + dhtd);
|
||||
//System.out.println("DHT Double = " + dhtd);
|
||||
System.out.println("DHT Long = " + dhtl);
|
||||
System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE));
|
||||
System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
|
||||
System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
|
||||
//System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
|
||||
//System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
|
||||
System.out.println("DHT as b64 from Long = " + positionToHash(dhtl));
|
||||
|
||||
System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: ");
|
||||
/*
|
||||
|
||||
double[] d = dhtPositionsDouble(wordHash, partitionExponent);
|
||||
for (int i = 0; i < d.length; i++) {
|
||||
if (i > 0) System.out.print(", ");
|
||||
System.out.print(positionToHash(d[i]));
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
*/
|
||||
System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : ");
|
||||
long[] l = dhtPositions(wordHash, partitionExponent);
|
||||
for (int i = 0; i < l.length; i++) {
|
||||
|
|
|
@ -91,6 +91,9 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
|
|||
public long lastSeedUpload_timeStamp = System.currentTimeMillis();
|
||||
public String lastSeedUpload_myIP = "";
|
||||
|
||||
public int netRedundancy;
|
||||
public int partitionExponent;
|
||||
|
||||
private yacySeed mySeed; // my own seed
|
||||
|
||||
private final Hashtable<String, yacySeed> nameLookupCache;
|
||||
|
@ -100,12 +103,16 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
|
|||
final File seedActiveDBFile,
|
||||
final File seedPassiveDBFile,
|
||||
final File seedPotentialDBFile,
|
||||
final File myOwnSeedFile) {
|
||||
final File myOwnSeedFile,
|
||||
final int redundancy,
|
||||
final int partitionExponent) {
|
||||
this.seedActiveDBFile = seedActiveDBFile;
|
||||
this.seedPassiveDBFile = seedPassiveDBFile;
|
||||
this.seedPotentialDBFile = seedPotentialDBFile;
|
||||
this.mySeed = null; // my own seed
|
||||
this.myOwnSeedFile = myOwnSeedFile;
|
||||
this.netRedundancy = redundancy;
|
||||
this.partitionExponent = partitionExponent;
|
||||
|
||||
// set up seed database
|
||||
seedActiveDB = openSeedTable(seedActiveDBFile);
|
||||
|
|
|
@ -674,7 +674,7 @@ public final class yacy {
|
|||
final int cacheMem = (int)(serverMemory.max() - serverMemory.total());
|
||||
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
|
||||
|
||||
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1);
|
||||
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
|
||||
final Iterator<indexContainer> indexContainerIterator = wordIndex.wordContainers("AAAAAAAAAAAA", false, false);
|
||||
|
||||
long urlCounter = 0, wordCounter = 0;
|
||||
|
@ -865,7 +865,7 @@ public final class yacy {
|
|||
try {
|
||||
Iterator<indexContainer> indexContainerIterator = null;
|
||||
if (resource.equals("all")) {
|
||||
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1);
|
||||
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
|
||||
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, false, false);
|
||||
}
|
||||
int counter = 0;
|
||||
|
|
Loading…
Reference in New Issue
Block a user