added more methods to control the vertical DHT (not yet active .. )

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5514 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-01-23 15:32:27 +00:00
parent 4ef6b15eb8
commit 419469ac27
13 changed files with 169 additions and 119 deletions

View File

@ -14,6 +14,7 @@ network.unit.search.time = 4
network.unit.dht = true
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 3
network.unit.dht.partitionExponent = 1
network.unit.remotecrawl.speed = 6
network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt
network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt

View File

@ -13,6 +13,7 @@ network.unit.domain = local
network.unit.dht = false
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 1
network.unit.dht.partitionExponent = 0
network.unit.remotecrawl.speed = 600
# each network may use different yacy distributions.

View File

@ -32,6 +32,7 @@
# network.unit.description = <any string, just informal; appears in network graphic>
# network.unit.domain = 'global'|'local'|'any'
# network.unit.dhtredundancy = <integer number, 0 means no DHT enabled>
# network.unit.dht.partitionExponent = <integer number, 0 means no DHT parition, 1 is partition in two, 2 is partition in four and so on>
# network.unit.bootstrap.seedlist<n> = <an url to a seedlists-file, which is stored by a principal peer>
# network.unit.protocol.control = 'uncontrolled'|'moderated'|'controlled'
# network.unit.protocol.request.authentication.method = 'salted-magic'

View File

@ -10,6 +10,7 @@ network.unit.search.time = 4
network.unit.dht = false
network.unit.dhtredundancy.junior = 1
network.unit.dhtredundancy.senior = 1
network.unit.dht.partitionExponent = 0
network.unit.remotecrawl.speed = 1
# each network may use different yacy distributions.

View File

@ -39,6 +39,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroDigest;
import de.anomic.kelondro.kelondroException;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyPeerSelection;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
@ -59,6 +60,7 @@ public class plasmaDHTChunk {
private indexContainer[] indexContainers = null;
private HashMap<String, indexURLReference> urlCache; // String (url-hash) / plasmaCrawlLURL.Entry
private int idxCount;
private ArrayList<yacySeed> targets;
private long selectionStartTime = 0;
private long selectionEndTime = 0;
@ -85,6 +87,10 @@ public class plasmaDHTChunk {
return this.idxCount;
}
public ArrayList<yacySeed> targets() {
return this.targets;
}
private int indexCounter() {
int c = 0;
for (int i = 0; i < indexContainers.length; i++) {
@ -105,17 +111,43 @@ public class plasmaDHTChunk {
return this.status;
}
public plasmaDHTChunk(final serverLog log, final plasmaWordIndex wordIndex, final int minCount, final int maxCount, final int maxtime, String startPointHash) {
public plasmaDHTChunk(
final serverLog log,
final plasmaWordIndex wordIndex,
final int minContainerCount,
final int maxContainerCount,
final int maxtime,
final String startPointHash) {
try {
this.log = log;
this.wordIndex = wordIndex;
this.startPointHash = startPointHash;
if (this.log.isFine()) log.logFine("Selected hash " + this.startPointHash + " as start point for index distribution, distance = " + yacySeed.dhtDistance(this.startPointHash, wordIndex.seedDB.mySeed()));
selectTransferContainers(this.startPointHash, minCount, maxCount, maxtime);
this.selectionStartTime = System.currentTimeMillis();
// find target peers for the containers
int peerCount = wordIndex.seedDB.netRedundancy * 3 + 1;
final Iterator<yacySeed> seedIter = yacyPeerSelection.getAcceptRemoteIndexSeeds(wordIndex.seedDB, this.startPointHash, peerCount, false);
this.targets = new ArrayList<yacySeed>();
while (seedIter.hasNext() && peerCount-- > 0) this.targets.add(seedIter.next());
// select the containers:
// select from RAM
final int refcountRAM = selectTransferContainersResource(this.startPointHash, true, maxContainerCount, maxtime);
if (refcountRAM >= minContainerCount) {
if (this.log.isFine()) log.logFine("DHT selection from RAM: " + refcountRAM + " entries");
} else {
// select from DB
final int refcountFile = selectTransferContainersResource(this.startPointHash, false, maxContainerCount, maxtime);
if (this.log.isFine()) log.logFine("DHT selection from FILE: " + refcountFile + " entries, RAM provided only " + refcountRAM + " entries");
}
this.selectionEndTime = System.currentTimeMillis();
// count the indexes, can be smaller as expected
this.idxCount = indexCounter();
if (this.idxCount < minCount) {
if (this.idxCount < minContainerCount) {
if (this.log.isFine()) log.logFine("Too few (" + this.idxCount + ") indexes selected for transfer.");
this.status = chunkStatus_FAILED;
}
@ -125,31 +157,21 @@ public class plasmaDHTChunk {
}
public static String selectTransferStart() {
// a random start point. It is dangerous to take a computed start point, because it could cause a flooding at specific target peers, because
// the community of all peers would use the same method for target computation. It is better to just use a random start point.
return kelondroBase64Order.enhancedCoder.encode(kelondroDigest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + yacySeedDB.commonHashLength);
}
private void selectTransferContainers(final String hash, final int mincount, final int maxcount, final int maxtime) throws InterruptedException {
try {
this.selectionStartTime = System.currentTimeMillis();
final int refcountRAM = selectTransferContainersResource(hash, true, maxcount, maxtime);
if (refcountRAM >= mincount) {
if (this.log.isFine()) log.logFine("DHT selection from RAM: " + refcountRAM + " entries");
return;
}
final int refcountFile = selectTransferContainersResource(hash, false, maxcount, maxtime);
if (this.log.isFine()) log.logFine("DHT selection from FILE: " + refcountFile + " entries, RAM provided only " + refcountRAM + " entries");
return;
} finally {
this.selectionEndTime = System.currentTimeMillis();
}
}
private int selectTransferContainersResource(final String hash, final boolean ram, final int maxcount, final int maxtime) throws InterruptedException {
private int selectTransferContainersResource(final String hash, final boolean ram, final int maxContainerCount, final int maxtime) throws InterruptedException {
// if (maxcount > 500) { maxcount = 500; } // flooding & OOM reduce
// the hash is a start hash from where the indexes are picked
final ArrayList<indexContainer> tmpContainers = new ArrayList<indexContainer>(maxcount);
// the peer hash of the first peer is the upper limit for the collection
String limitHash = this.targets.get(0).hash;
final ArrayList<indexContainer> tmpContainers = new ArrayList<indexContainer>(maxContainerCount);
try {
final Iterator<indexContainer> indexContainerIterator = wordIndex.indexContainerSet(hash, ram, true, maxcount).iterator();
final Iterator<indexContainer> indexContainerIterator = wordIndex.indexContainerSet(hash, ram, true, maxContainerCount).iterator();
indexContainer container;
Iterator<indexRWIRowEntry> urlIter;
indexRWIRowEntry iEntry;
@ -158,15 +180,14 @@ public class plasmaDHTChunk {
int wholesize;
urlCache = new HashMap<String, indexURLReference>();
final long maximumDistanceLong = Long.MAX_VALUE / wordIndex.seedDB.sizeConnected() * wordIndex.netRedundancy * 2;
final long timeout = (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
while (
(maxcount > refcount) &&
(maxContainerCount > refcount) &&
(indexContainerIterator.hasNext()) &&
((container = indexContainerIterator.next()) != null) &&
(container.size() > 0) &&
((tmpContainers.size() == 0) ||
(Math.abs(yacySeed.dhtPosition(container.getWordHash()) - yacySeed.dhtPosition(tmpContainers.get(0).getWordHash())) < maximumDistanceLong)) &&
(kelondroBase64Order.enhancedComparator.compare(container.getWordHash(), limitHash) < 0)) &&
(System.currentTimeMillis() < timeout)
) {
// check for interruption
@ -178,7 +199,7 @@ public class plasmaDHTChunk {
wholesize = container.size();
urlIter = container.entries();
// iterate over indexes to fetch url entries and store them in the urlCache
while ((urlIter.hasNext()) && (maxcount > refcount) && (System.currentTimeMillis() < timeout)) {
while ((urlIter.hasNext()) && (maxContainerCount > refcount) && (System.currentTimeMillis() < timeout)) {
// CPU & IO reduce
// try { Thread.sleep(50); } catch (InterruptedException e) { }

View File

@ -147,8 +147,11 @@ public class plasmaGrafics {
final Iterator<String> i = query.queryHashes.iterator();
eventPicture.setColor(ymageMatrix.GREY);
while (i.hasNext()) {
angle = (int) (360.0 * (((double) yacySeed.dhtPosition(i.next())) / ((double) Long.MAX_VALUE)));
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
long[] positions = yacySeed.dhtPositions(i.next(), seedDB.partitionExponent);
for (int j = 0; j < positions.length; j++) {
angle = (int) (360.0 * (((double) positions[j]) / ((double) Long.MAX_VALUE)));
eventPicture.arcLine(cx, cy, cr - 20, cr, angle);
}
}
return eventPicture;

View File

@ -171,7 +171,6 @@ import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacyNewsPool;
import de.anomic.yacy.yacyNewsRecord;
import de.anomic.yacy.yacyPeerSelection;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyTray;
import de.anomic.yacy.yacyURL;
@ -308,7 +307,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, "");
final boolean useCommons = getConfigBool("index.storeCommons", false);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
webIndex = new plasmaWordIndex(networkName, log, indexPrimaryPath, indexSecondaryPath, wordCacheMaxCount, useCommons, redundancy);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
webIndex = new plasmaWordIndex(
networkName,
log,
indexPrimaryPath,
indexSecondaryPath,
wordCacheMaxCount,
useCommons,
redundancy,
paritionExponent);
crawlResults = new ResultURLs();
// start yacy core
@ -746,7 +754,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
final boolean useCommons = getConfigBool("index.storeCommons", false);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
this.webIndex = new plasmaWordIndex(getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""), getLog(), indexPrimaryPath, indexSecondaryPath, wordCacheMaxCount, useCommons, redundancy);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
this.webIndex = new plasmaWordIndex(
getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""),
getLog(),
indexPrimaryPath,
indexSecondaryPath,
wordCacheMaxCount,
useCommons,
redundancy,
paritionExponent);
// we need a new stacker, because this uses network-specific attributes to sort out urls (local, global)
this.crawlStacker = new CrawlStacker(
crawlQueues,
@ -1992,7 +2009,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
try {
// find a list of DHT-peers
if (log != null) log.logInfo("Collecting DHT target peers for first_hash = " + dhtChunk.firstContainer().getWordHash() + ", last_hash = " + dhtChunk.lastContainer().getWordHash());
final Iterator<yacySeed> seedIter = yacyPeerSelection.getAcceptRemoteIndexSeeds(webIndex.seedDB, dhtChunk.lastContainer().getWordHash(), peerCount + 9, false);
final Iterator<yacySeed> seedIter = dhtChunk.targets().iterator();
// send away the indexes to all these peers
int hc1 = 0;

View File

@ -113,15 +113,21 @@ public final class plasmaWordIndex implements indexRI {
public CrawlProfile.entry defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
private final File queuesRoot;
public yacyPeerActions peerActions;
public int netRedundancy;
public plasmaWordIndex(final String networkName, final serverLog log, final File indexPrimaryRoot, final File indexSecondaryRoot, final int entityCacheMaxSize, boolean useCommons, int redundancy) {
public plasmaWordIndex(
final String networkName,
final serverLog log,
final File indexPrimaryRoot,
final File indexSecondaryRoot,
final int entityCacheMaxSize,
final boolean useCommons,
final int redundancy,
final int partitionExponent) {
if (networkName == null || networkName.length() == 0) {
log.logSevere("no network name given - shutting down");
System.exit(0);
}
this.log = log;
this.netRedundancy = redundancy;
this.primaryRoot = new File(indexPrimaryRoot, networkName);
this.secondaryRoot = new File(indexSecondaryRoot, networkName);
File indexPrimaryTextLocation = new File(this.primaryRoot, "TEXT");
@ -229,7 +235,9 @@ public final class plasmaWordIndex implements indexRI {
new File(networkRoot, "seed.new.heap"),
new File(networkRoot, "seed.old.heap"),
new File(networkRoot, "seed.pot.heap"),
mySeedFile
mySeedFile,
redundancy,
partitionExponent
);
// create or init news database

View File

@ -42,9 +42,9 @@ import de.anomic.server.logging.serverLog;
public class yacyPeerSelection {
public static void selectDHTPositions(final yacySeedDB seedDB, String wordhash, int redundancy, HashMap<String, yacySeed> regularSeeds, kelondroMScoreCluster<String> ranking) {
public static void selectDHTPositions(final yacySeedDB seedDB, String wordhash, int redundancy, int partitionExponent, HashMap<String, yacySeed> regularSeeds, kelondroMScoreCluster<String> ranking) {
// this method is called from the search target computation
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, partitionExponent);
yacySeed seed;
long distance;
for (int v = 0; v < dhtVerticalTargets.length; v++) {
@ -65,16 +65,16 @@ public class yacyPeerSelection {
}
}
public static boolean verifyIfOwnWord(final yacySeedDB seedDB, String wordhash, int redundancy) {
public static boolean verifyIfOwnWord(final yacySeedDB seedDB, String wordhash, int redundancy, int partitionExponent) {
String myHash = seedDB.mySeed().hash;
//long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
//for (int v = 0; v < dhtVerticalTargets.length; v++) {
//wordhash = yacySeed.positionToHash(dhtVerticalTargets[0]);
long[] dhtVerticalTargets = yacySeed.dhtPositions(wordhash, partitionExponent);
for (int v = 0; v < dhtVerticalTargets.length; v++) {
wordhash = yacySeed.positionToHash(dhtVerticalTargets[0]);
Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true);
while (dhtEnum.hasNext()) {
if (dhtEnum.next().hash.equals(myHash)) return true;
}
//}
}
return false;
}

View File

@ -169,7 +169,7 @@ public class yacySearch extends Thread {
//return (yacySeed[]) l.toArray();
}
private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final Set<String> wordhashes, int seedcount, int redundancy) {
private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final Set<String> wordhashes, int seedcount, int redundancy, int partitionExponent) {
// find out a specific number of seeds, that would be relevant for the given word hash(es)
// the result is ordered by relevance: [0] is most relevant
// the seedcount is the maximum number of wanted results
@ -186,7 +186,7 @@ public class yacySearch extends Thread {
Iterator<yacySeed> dhtEnum;
Iterator<String> iter = wordhashes.iterator();
while (iter.hasNext()) {
yacyPeerSelection.selectDHTPositions(seedDB, iter.next(), redundancy, regularSeeds, ranking);
yacyPeerSelection.selectDHTPositions(seedDB, iter.next(), redundancy, partitionExponent, regularSeeds, ranking);
}
// put in seeds according to size of peer
@ -262,7 +262,15 @@ public class yacySearch extends Thread {
// prepare seed targets and threads
assert language != null;
final yacySeed[] targetPeers = (clusterselection == null) ? selectSearchTargets(wordIndex.seedDB, plasmaSearchQuery.hashes2Set(wordhashes), targets, wordIndex.netRedundancy) : selectClusterPeers(wordIndex.seedDB, clusterselection);
final yacySeed[] targetPeers =
(clusterselection == null) ?
selectSearchTargets(
wordIndex.seedDB,
plasmaSearchQuery.hashes2Set(wordhashes),
targets,
wordIndex.seedDB.netRedundancy,
wordIndex.seedDB.partitionExponent)
: selectClusterPeers(wordIndex.seedDB, clusterselection);
if (targetPeers == null) return new yacySearch[0];
targets = targetPeers.length;
if (targets == 0) return new yacySearch[0];

View File

@ -68,7 +68,6 @@ import de.anomic.tools.crypt;
public class yacySeed implements Cloneable {
public static final int partitionExponent = 1;
public static final int maxsize = 4096;
/**
* <b>substance</b> "sI" (send index/words)
@ -676,20 +675,18 @@ public class yacySeed implements Cloneable {
return type.equals(yacySeed.PEERTYPE_SENIOR) || type.equals(yacySeed.PEERTYPE_PRINCIPAL);
}
/**
* @deprecated this does not reflect the vertical DHT. A peer may have several positions now.
*/
public final long dhtPosition() {
// normalized to Long.MAX_VALUE
return dhtPosition(this.hash);
}
private final static double dhtPositionDouble(final String wordHash) {
// normalized to 1.0
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
assert c != Long.MAX_VALUE;
if (c == Long.MAX_VALUE) return 0.999999999999;
return ((double) c) / ((double) Long.MAX_VALUE);
}
public final static long dhtPosition(final String wordHash) {
/**
* @deprecated use dhtPosition(wordHash, urlHash, partitionExponent) instead
*/
private final static long dhtPosition(final String wordHash) {
// normalized to Long.MAX_VALUE
long c = kelondroBase64Order.enhancedCoder.cardinal(wordHash.getBytes());
assert c != Long.MAX_VALUE;
@ -712,67 +709,51 @@ public class yacySeed implements Cloneable {
* @param urlHash, the hash of a reference
* @return a double in the range 0 .. 1.0 (including 0, excluding 1.0), the DHT position
*/
private final static double dhtPositionDouble(final String wordHash, final String urlHash, final int e) {
assert wordHash != null;
assert urlHash != null;
if (urlHash == null) return dhtPositionDouble(wordHash);
// calculate the primary DHT position:
// this is done first using the 'classic' dht position and then
// calculation an alternative 'first' position considering the partition size
// because of the number of partitions, the 'original' position is reached as one of the
// alternative dht positions within the partitions
double primary = dhtPositionDouble(wordHash); // the hash position for horizontal performance scaling
// the number of partitions is 2 ** e, the partitions may grow exponentially (every time it is doubled)
double partitions = (double) (1L << e);
// modification of the primary position using the partitions to create a normalization:
double normalization = Math.floor(primary * partitions) / partitions;
// calculate the shift: the alternative position for vertical performance scaling
double shift = Math.floor(dhtPositionDouble(urlHash) * partitions) / partitions;
// the final position is the primary, normalized position plus the shift
double p = primary - normalization + shift;
// one of the possible shift positions points to the original dht position:
// this is where the shift is equal to the normalization, when
// Math.floor(dhtPosition(wordHash) * partitions) == Math.floor(dhtPosition(urlHash) * partitions)
assert p < 1.0 : "p = " + p; // because of the normalization never an overflow should occur
assert p >= 0.0 : "p = " + p;
return (p < 1.0) ? p : p - 1.0;
}
public final static long dhtPosition(final String wordHash, final String urlHash, final int e) {
public final static long dhtPosition(final String wordHash, final String urlHash, final int partitionExponent) {
// this creates 1^^e different positions for the same word hash (according to url hash)
assert wordHash != null;
assert urlHash != null;
if (urlHash == null || e < 1) return dhtPosition(wordHash);
if (urlHash == null || partitionExponent < 1) return dhtPosition(wordHash);
// the partition size is (Long.MAX + 1) / 2 ** e == 2 ** (63 - e)
assert e > 0;
long partitionMask = (1L << (Long.SIZE - 1 - e)) - 1L;
assert partitionExponent > 0;
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
// compute the position using a specific fragment of the word hash and the url hash:
// - from the word hash take the (63 - <partitionExponent>) lower bits
// - from the url hash take the (63 - <partitionExponent>) higher bits
// in case that the partitionExpoent is 1, only one bit is taken from the urlHash,
// which means that the partition is in two parts.
// With partitionExponent = 2 it is divided in four parts and so on.
return (dhtPosition(wordHash) & partitionMask) | (dhtPosition(urlHash) & ~partitionMask);
}
public final static long dhtPosition(final String wordHash, final int verticalPosition, final int partitionExponent) {
assert wordHash != null;
if (partitionExponent == 0) return dhtPosition(wordHash);
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
long verticalMask = verticalPosition << (Long.SIZE - 1 - partitionExponent);
return (dhtPosition(wordHash) & partitionMask) | verticalMask;
}
public final static int verticalPosition(final String urlHash, final int partitionExponent) {
assert urlHash != null;
if (urlHash == null || partitionExponent < 1) return 0;
assert partitionExponent > 0;
long partitionMask = (1L << (Long.SIZE - 1 - partitionExponent)) - 1L;
return (int) (dhtPosition(urlHash) & ~partitionMask) >> (Long.SIZE - 1 - partitionExponent);
}
/**
* compute all vertical DHT positions for a given word
* This is used when a word is searched and the peers holding the word must be computed
* @param wordHash, the hash of the word
* @param partitions, the number of partitions of the DHT
* @return a vector of long values, the possible DHT positions
*/
private final static double[] dhtPositionsDouble(final String wordHash, final int e) {
public final static long[] dhtPositions(final String wordHash, final int partitionExponent) {
assert wordHash != null;
int partitions = 1 << e;
double[] d = new double[partitions];
double primary = dhtPositionDouble(wordHash);
double partitionSize = 1.0 / (double) partitions;
d[0] = primary - Math.floor(primary * partitions) / partitions;
for (int i = 1; i < partitions; i++) {
d[i] = d[i - 1] + partitionSize;
}
return d;
}
public final static long[] dhtPositions(final String wordHash, final int e) {
assert wordHash != null;
int partitions = 1 << e;
int partitions = 1 << partitionExponent;
long[] l = new long[partitions];
long partitionSize = 1L << (Long.SIZE - 1 - e);
long partitionSize = 1L << (Long.SIZE - 1 - partitionExponent);
l[0] = dhtPosition(wordHash) & (partitionSize - 1L);
for (int i = 1; i < partitions; i++) {
l[i] = l[i - 1] + partitionSize;
@ -1027,12 +1008,12 @@ public class yacySeed implements Cloneable {
private static int verifiedOwn = 0;
private static int verifiedNotOwn = 0;
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash, int redundancy) {
public static boolean shallBeOwnWord(final yacySeedDB seedDB, final String wordhash, int redundancy, int partitionExponent) {
// the guessIfOwnWord is a fast method that should only fail in case that a 'true' may be incorrect, but a 'false' shall always be correct
if (guessIfOwnWord(seedDB, wordhash)) {
if (guessIfOwnWord(seedDB, wordhash, partitionExponent)) {
// this case must be verified, because it can be wrong.
guessedOwn++;
if (yacyPeerSelection.verifyIfOwnWord(seedDB, wordhash, redundancy)) {
if (yacyPeerSelection.verifyIfOwnWord(seedDB, wordhash, redundancy, partitionExponent)) {
// this is the correct case, but does not need to be an average case
verifiedOwn++;
//System.out.println("*** DEBUG shallBeOwnWord: true. guessed: true. verified/guessed ration = " + verifiedOwn + "/" + guessedOwn);
@ -1064,12 +1045,11 @@ public class yacySeed implements Cloneable {
}
private static boolean guessIfOwnWord(final yacySeedDB seedDB, final String wordhash) {
private static boolean guessIfOwnWord(final yacySeedDB seedDB, final String wordhash, int partitionExponent) {
if (seedDB == null) return false;
if (seedDB.mySeed().isPotential()) return false;
int connected = seedDB.sizeConnected();
if (connected == 0) return true;
final long[] targets = yacySeed.dhtPositions(wordhash, yacySeed.partitionExponent);
final long[] targets = yacySeed.dhtPositions(wordhash, partitionExponent);
final long mypos = yacySeed.dhtPosition(seedDB.mySeed().hash);
for (int i = 0; i < targets.length; i++) {
long distance = yacySeed.dhtDistance(targets[i], mypos);
@ -1084,35 +1064,37 @@ public class yacySeed implements Cloneable {
// java -classpath classes de.anomic.yacy.yacySeed hHJBztzcFG76 M8hgtrHG6g12 3
// test the DHT position calculation
String wordHash = args[0];
double dhtd;
//double dhtd;
long dhtl;
int partitionExponent = 0;
if (args.length == 3) {
// the horizontal and vertical position calculation
String urlHash = args[1];
partitionExponent = Integer.parseInt(args[2]);
dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
//dhtd = dhtPositionDouble(wordHash, urlHash, partitionExponent);
dhtl = dhtPosition(wordHash, urlHash, partitionExponent);
} else {
// only a horizontal position calculation
dhtd = dhtPositionDouble(wordHash);
//dhtd = dhtPositionDouble(wordHash);
dhtl = dhtPosition(wordHash);
}
System.out.println("DHT Double = " + dhtd);
//System.out.println("DHT Double = " + dhtd);
System.out.println("DHT Long = " + dhtl);
System.out.println("DHT as Double from Long = " + ((double) dhtl) / ((double) Long.MAX_VALUE));
System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
//System.out.println("DHT as Long from Double = " + (long) (Long.MAX_VALUE * dhtd));
//System.out.println("DHT as b64 from Double = " + positionToHash(dhtd));
System.out.println("DHT as b64 from Long = " + positionToHash(dhtl));
System.out.print("all " + (1 << partitionExponent) + " DHT positions from doubles: ");
/*
double[] d = dhtPositionsDouble(wordHash, partitionExponent);
for (int i = 0; i < d.length; i++) {
if (i > 0) System.out.print(", ");
System.out.print(positionToHash(d[i]));
}
System.out.println();
*/
System.out.print("all " + (1 << partitionExponent) + " DHT positions from long : ");
long[] l = dhtPositions(wordHash, partitionExponent);
for (int i = 0; i < l.length; i++) {

View File

@ -90,6 +90,9 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
public int lastSeedUpload_seedDBSize = 0;
public long lastSeedUpload_timeStamp = System.currentTimeMillis();
public String lastSeedUpload_myIP = "";
public int netRedundancy;
public int partitionExponent;
private yacySeed mySeed; // my own seed
@ -100,12 +103,16 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
final File seedActiveDBFile,
final File seedPassiveDBFile,
final File seedPotentialDBFile,
final File myOwnSeedFile) {
final File myOwnSeedFile,
final int redundancy,
final int partitionExponent) {
this.seedActiveDBFile = seedActiveDBFile;
this.seedPassiveDBFile = seedPassiveDBFile;
this.seedPotentialDBFile = seedPotentialDBFile;
this.mySeed = null; // my own seed
this.myOwnSeedFile = myOwnSeedFile;
this.netRedundancy = redundancy;
this.partitionExponent = partitionExponent;
// set up seed database
seedActiveDB = openSeedTable(seedActiveDBFile);

View File

@ -674,7 +674,7 @@ public final class yacy {
final int cacheMem = (int)(serverMemory.max() - serverMemory.total());
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1);
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
final Iterator<indexContainer> indexContainerIterator = wordIndex.wordContainers("AAAAAAAAAAAA", false, false);
long urlCounter = 0, wordCounter = 0;
@ -865,7 +865,7 @@ public final class yacy {
try {
Iterator<indexContainer> indexContainerIterator = null;
if (resource.equals("all")) {
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1);
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, false, false);
}
int counter = 0;