replaced old 'kCache' by a full-controlled cache

there are now two full-controlled caches for incoming indexes:
- dhtIn
- dhtOut
during indexing, all indexes that shall not be transported to remote peers
because they belong to the own peer are stored to dhtIn. It is furthermore
ensured that received indexes are not again transmitted to other peers
directly. They may, however be transmitted later if the network grows.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2574 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-09-14 00:51:02 +00:00
parent 6e2907135a
commit c89d8142bb
13 changed files with 207 additions and 245 deletions

View File

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4 javacTarget=1.4
# Release Configuration # Release Configuration
releaseVersion=0.463 releaseVersion=0.464
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

View File

@ -71,8 +71,8 @@
<table border="0" cellpadding="5" cellspacing="1"> <table border="0" cellpadding="5" cellspacing="1">
<tr valign="top" class="TableHeader"> <tr valign="top" class="TableHeader">
<td>Cache Type</td> <td>Cache Type</td>
<td>Indexing</td> <td>DHT-Out</td>
<td>DHT</td> <td>DHT-In</td>
<td>Description</td> <td>Description</td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
@ -96,7 +96,7 @@
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td>Maximum URLs currently assigned<br />to one cached word:</td> <td>Maximum URLs currently assigned<br />to one cached word:</td>
<td>#[maxURLinWCache]#</td> <td>#[maxURLinWCache]#</td>
<td>not controlled<br />for DHT cache</td> <td>#[maxURLinKCache]#</td>
<td> <td>
This is the maximum size of URLs assigned to a single word cache entry. This is the maximum size of URLs assigned to a single word cache entry.
If this is a big number, it shows that the caching works efficiently. If this is a big number, it shows that the caching works efficiently.

View File

@ -259,13 +259,14 @@ public class PerformanceQueues_p {
// table cache settings // table cache settings
prop.put("urlCacheSize", switchboard.urlPool.loadedURL.writeCacheSize()); prop.put("urlCacheSize", switchboard.urlPool.loadedURL.writeCacheSize());
prop.put("wordCacheWSize", switchboard.wordIndex.wSize()); prop.put("wordCacheWSize", switchboard.wordIndex.dhtOutCacheSize());
prop.put("wordCacheKSize", switchboard.wordIndex.kSize()); prop.put("wordCacheKSize", switchboard.wordIndex.dhtInCacheSize());
prop.put("maxURLinWCache", "" + switchboard.wordIndex.maxURLinWCache()); prop.put("maxURLinWCache", "" + switchboard.wordIndex.maxURLinDHTOutCache());
prop.put("maxAgeOfWCache", "" + (switchboard.wordIndex.maxAgeOfWCache() / 1000 / 60)); // minutes prop.put("maxURLinKCache", "" + switchboard.wordIndex.maxURLinDHTInCache());
prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfWCache() / 1000 / 60)); // minutes prop.put("maxAgeOfWCache", "" + (switchboard.wordIndex.maxAgeOfDHTOutCache() / 1000 / 60)); // minutes
prop.put("maxAgeOfKCache", "" + (switchboard.wordIndex.maxAgeOfKCache() / 1000 / 60)); // minutes prop.put("maxAgeOfKCache", "" + (switchboard.wordIndex.maxAgeOfDHTInCache() / 1000 / 60)); // minutes
prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfKCache() / 1000 / 60)); // minutes prop.put("minAgeOfWCache", "" + (switchboard.wordIndex.minAgeOfDHTOutCache() / 1000 / 60)); // minutes
prop.put("minAgeOfKCache", "" + (switchboard.wordIndex.minAgeOfDHTInCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180")); prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMaxCount", switchboard.getConfigLong("wordCacheMaxCount", 20000)); prop.put("wordCacheMaxCount", switchboard.getConfigLong("wordCacheMaxCount", 20000));
prop.put("wordCacheInitCount", switchboard.getConfigLong("wordCacheInitCount", 30000)); prop.put("wordCacheInitCount", switchboard.getConfigLong("wordCacheInitCount", 30000));

View File

@ -23,7 +23,7 @@
<input type="submit" name="Enter" value="Search" /> <input type="submit" name="Enter" value="Search" />
<input type="hidden" name="former" value="#[former]#" /> <input type="hidden" name="former" value="#[former]#" />
#(searchoptions)# #(searchoptions)#
<input type="hidden" name="count" value="20" /> <input type="hidden" name="count" value="10" />
<input type="hidden" name="order" value="Date-YBR-Quality" /> <input type="hidden" name="order" value="Date-YBR-Quality" />
<input type="hidden" name="resource" value="global" /> <input type="hidden" name="resource" value="global" />
<input type="hidden" name="time" value="6" /> <input type="hidden" name="time" value="6" />

View File

@ -103,8 +103,8 @@ public class index {
prop.put("combine", 0); prop.put("combine", 0);
prop.put("resultbottomline", 0); prop.put("resultbottomline", 0);
prop.put("searchoptions", searchoptions); prop.put("searchoptions", searchoptions);
prop.put("searchoptions_count-10", 0); prop.put("searchoptions_count-10", 1);
prop.put("searchoptions_count-50", 1); prop.put("searchoptions_count-50", 0);
prop.put("searchoptions_count-100", 0); prop.put("searchoptions_count-100", 0);
prop.put("searchoptions_count-1000", 0); prop.put("searchoptions_count-1000", 0);
prop.put("searchoptions_order-ybr-date-quality", plasmaSearchPreOrder.canUseYBR() ? 1 : 0); prop.put("searchoptions_order-ybr-date-quality", plasmaSearchPreOrder.canUseYBR() ? 1 : 0);

View File

@ -64,9 +64,9 @@ public class status_p {
prop.put("rejected", 0); prop.put("rejected", 0);
yacyCore.peerActions.updateMySeed(); yacyCore.peerActions.updateMySeed();
prop.put("ppm", yacyCore.seedDB.mySeed.get(yacySeed.ISPEED, "unknown")); prop.put("ppm", yacyCore.seedDB.mySeed.get(yacySeed.ISPEED, "unknown"));
prop.put("wordCacheSize", switchboard.wordIndex.wSize() + switchboard.wordIndex.kSize()); prop.put("wordCacheSize", switchboard.wordIndex.dhtOutCacheSize() + switchboard.wordIndex.dhtInCacheSize());
prop.put("wordCacheWSize", switchboard.wordIndex.wSize()); prop.put("wordCacheWSize", switchboard.wordIndex.dhtOutCacheSize());
prop.put("wordCacheKSize", switchboard.wordIndex.kSize()); prop.put("wordCacheKSize", switchboard.wordIndex.dhtInCacheSize());
prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000")); prop.put("wordCacheMaxCount", switchboard.getConfig("wordCacheMaxCount", "10000"));
// return rewrite properties // return rewrite properties

View File

@ -108,15 +108,15 @@ public final class transferRWI {
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". Not granted.");
result = "not_granted"; result = "not_granted";
pause = 0; pause = 0;
} else if (checkLimit && sb.wordIndex.kSize() > cachelimit) { } else if (checkLimit && sb.wordIndex.dhtInCacheSize() > cachelimit) {
// we are too busy to receive indexes // we are too busy to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.wordIndex.kSize() + ")."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (buffersize=" + sb.wordIndex.dhtInCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 60000; pause = 60000;
} else if ((checkLimit && sb.wordIndex.wSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) { } else if ((checkLimit && sb.wordIndex.dhtOutCacheSize() > sb.getConfigLong("wordCacheMaxCount", 20000)) || ((sb.wordIndex.busyCacheFlush) && (!shortCacheFlush))) {
// we are too busy flushing the ramCache to receive indexes // we are too busy flushing the ramCache to receive indexes
sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.wSize() + ")."); sb.getLog().logInfo("Rejecting RWIs from peer " + otherPeerName + ". We are too busy (wordcachesize=" + sb.wordIndex.dhtOutCacheSize() + ").");
granted = false; // don't accept more words if there are too many words to flush granted = false; // don't accept more words if there are too many words to flush
result = "busy"; result = "busy";
pause = 300000; pause = 300000;
@ -165,8 +165,8 @@ public final class transferRWI {
iEntry = new indexURLEntry(estring.substring(p)); iEntry = new indexURLEntry(estring.substring(p));
urlHash = iEntry.urlHash(); urlHash = iEntry.urlHash();
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(plasmaURLPattern.BLACKLIST_DHT, urlHash))) { if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(plasmaURLPattern.BLACKLIST_DHT, urlHash))) {
//int deleted = sb.wordIndex.tryRemoveURLs(urlHash); int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs"); yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
blocked++; blocked++;
} else { } else {
sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true); sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true);
@ -208,7 +208,7 @@ public final class transferRWI {
result = "ok"; result = "ok";
if (checkLimit) { if (checkLimit) {
pause = (sb.wordIndex.kSize() < 500) ? 0 : 60 * sb.wordIndex.kSize(); // estimation of necessary pause time pause = (sb.wordIndex.dhtInCacheSize() < 500) ? 0 : 60 * sb.wordIndex.dhtInCacheSize(); // estimation of necessary pause time
} }
} }

View File

@ -101,7 +101,7 @@ public final class transferURL {
if ((lEntry != null) && (lEntry.url() != null)) { if ((lEntry != null) && (lEntry.url() != null)) {
if ((blockBlacklist) && if ((blockBlacklist) &&
(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, lEntry.hash(), lEntry.url()))) { (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, lEntry.hash(), lEntry.url()))) {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash()); int deleted = 0; //sb.wordIndex.tryRemoveURLs(lEntry.hash()); // temporary disabled
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs"); yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null; lEntry = null;
blocked++; blocked++;

View File

@ -45,21 +45,18 @@ import de.anomic.yacy.yacySeedDB;
public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
// environment constants // environment constants
private static final String indexArrayFileName = "indexDump1.array";
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final long kCacheMaxAge = 1000 * 60 * 2; // milliseconds; 2 minutes
// class variables // class variables
private final File databaseRoot; private final File databaseRoot;
protected final TreeMap wCache; // wordhash-container protected final TreeMap cache; // wordhash-container
private final TreeMap kCache; // time-container; for karenz/DHT caching (set with high priority)
private final kelondroMScoreCluster hashScore; private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate; private final kelondroMScoreCluster hashDate;
private long kCacheInc = 0;
private long initTime; private long initTime;
private int wCacheMaxCount; private int cacheMaxCount;
public int wCacheReferenceLimit; public int cacheReferenceLimit;
private final serverLog log; private final serverLog log;
private String indexArrayFileName;
// calculated constants // calculated constants
private static String maxKey; private static String maxKey;
@ -68,20 +65,19 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-'; //minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
} }
public indexRAMCacheRI(File databaseRoot, int wCacheReferenceLimitInit, serverLog log) { public indexRAMCacheRI(File databaseRoot, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
// creates a new index cache // creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed // the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot; this.databaseRoot = databaseRoot;
this.wCache = new TreeMap(); this.cache = new TreeMap();
this.kCache = new TreeMap();
this.hashScore = new kelondroMScoreCluster(); this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster(); this.hashDate = new kelondroMScoreCluster();
this.kCacheInc = 0;
this.initTime = System.currentTimeMillis(); this.initTime = System.currentTimeMillis();
this.wCacheMaxCount = 10000; this.cacheMaxCount = 10000;
this.wCacheReferenceLimit = wCacheReferenceLimitInit; this.cacheReferenceLimit = wCacheReferenceLimitInit;
this.log = log; this.log = log;
indexArrayFileName = dumpname;
// read in dump of last session // read in dump of last session
try { try {
@ -92,7 +88,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
private void dump(int waitingSeconds) throws IOException { private void dump(int waitingSeconds) throws IOException {
log.logConfig("creating dump for index cache, " + wCache.size() + " words (and much more urls)"); log.logConfig("creating dump for index cache '" + indexArrayFileName + "', " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexArrayFileName); File indexDumpFile = new File(databaseRoot, indexArrayFileName);
if (indexDumpFile.exists()) indexDumpFile.delete(); if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroFixedWidthArray dumpArray = null; kelondroFixedWidthArray dumpArray = null;
@ -106,35 +102,10 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
long updateTime; long updateTime;
indexEntry iEntry; indexEntry iEntry;
kelondroRow.Entry row = dumpArray.row().newEntry(); kelondroRow.Entry row = dumpArray.row().newEntry();
// write kCache, this will be melted with the wCache upon load
synchronized (kCache) {
Iterator i = kCache.values().iterator();
while (i.hasNext()) {
container = (indexContainer) i.next();
// put entries on stack
if (container != null) {
Iterator ci = container.entries();
while (ci.hasNext()) {
iEntry = (indexEntry) ci.next();
row.setCol(0, container.getWordHash().getBytes());
row.setCol(1, container.size());
row.setCol(2, container.updated());
row.setCol(3, iEntry.urlHash().getBytes());
row.setCol(4, iEntry.toEncodedByteArrayForm(false));
dumpArray.set((int) urlcount++, row);
}
}
wordcount++;
i.remove(); // free some mem
}
}
// write wCache // write wCache
synchronized (wCache) { synchronized (cache) {
Iterator i = wCache.entrySet().iterator(); Iterator i = cache.entrySet().iterator();
while (i.hasNext()) { while (i.hasNext()) {
// get entries // get entries
entry = (Map.Entry) i.next(); entry = (Map.Entry) i.next();
@ -162,7 +133,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
if (System.currentTimeMillis() > messageTime) { if (System.currentTimeMillis() > messageTime) {
// System.gc(); // for better statistic // System.gc(); // for better statistic
wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime); wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("dumping status: " + wordcount + " words done, " + (wCache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB"); log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
messageTime = System.currentTimeMillis() + 5000; messageTime = System.currentTimeMillis() + 5000;
} }
} }
@ -176,12 +147,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
File indexDumpFile = new File(databaseRoot, indexArrayFileName); File indexDumpFile = new File(databaseRoot, indexArrayFileName);
if (!(indexDumpFile.exists())) return 0; if (!(indexDumpFile.exists())) return 0;
kelondroFixedWidthArray dumpArray = new kelondroFixedWidthArray(indexDumpFile, plasmaWordIndexAssortment.bufferStructureBasis, 0); kelondroFixedWidthArray dumpArray = new kelondroFixedWidthArray(indexDumpFile, plasmaWordIndexAssortment.bufferStructureBasis, 0);
log.logConfig("restore array dump of index cache, " + dumpArray.size() + " word/URL relations"); log.logConfig("restore array dump of index cache '" + indexArrayFileName + "', " + dumpArray.size() + " word/URL relations");
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000; long messageTime = System.currentTimeMillis() + 5000;
long urlCount = 0, urlsPerSecond = 0; long urlCount = 0, urlsPerSecond = 0;
try { try {
synchronized (wCache) { synchronized (cache) {
int i = dumpArray.size(); int i = dumpArray.size();
String wordHash; String wordHash;
//long creationTime; //long creationTime;
@ -211,7 +182,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
dumpArray.close(); dumpArray.close();
log.logConfig("restored " + wCache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds"); log.logConfig("restored " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
} catch (kelondroException e) { } catch (kelondroException e) {
// restore failed // restore failed
log.logSevere("restore of indexCache array dump failed: " + e.getMessage(), e); log.logSevere("restore of indexCache array dump failed: " + e.getMessage(), e);
@ -223,54 +194,36 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
// cache settings // cache settings
public int maxURLinWCache() { public int maxURLinCache() {
if (hashScore.size() == 0) return 0; if (hashScore.size() == 0) return 0;
return hashScore.getMaxScore(); return hashScore.getMaxScore();
} }
public long minAgeOfWCache() { public long minAgeOfCache() {
if (hashDate.size() == 0) return 0; if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMaxScore()); return System.currentTimeMillis() - longEmit(hashDate.getMaxScore());
} }
public long maxAgeOfWCache() { public long maxAgeOfCache() {
if (hashDate.size() == 0) return 0; if (hashDate.size() == 0) return 0;
return System.currentTimeMillis() - longEmit(hashDate.getMinScore()); return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
} }
public long minAgeOfKCache() {
if (kCache.size() == 0) return 0;
return System.currentTimeMillis() - ((Long) kCache.lastKey()).longValue();
}
public long maxAgeOfKCache() {
if (kCache.size() == 0) return 0;
return System.currentTimeMillis() - ((Long) kCache.firstKey()).longValue();
}
public void setMaxWordCount(int maxWords) { public void setMaxWordCount(int maxWords) {
this.wCacheMaxCount = maxWords; this.cacheMaxCount = maxWords;
} }
public int getMaxWordCount() { public int getMaxWordCount() {
return this.wCacheMaxCount; return this.cacheMaxCount;
} }
public int wSize() {
return wCache.size();
}
public int kSize() {
return kCache.size();
}
public int size() { public int size() {
return wCache.size() + kCache.size(); return cache.size();
} }
public int indexSize(String wordHash) { public int indexSize(String wordHash) {
int size = 0; int size = 0;
indexContainer cacheIndex = (indexContainer) wCache.get(wordHash); indexContainer cacheIndex = (indexContainer) cache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size(); if (cacheIndex != null) size += cacheIndex.size();
return size; return size;
} }
@ -294,7 +247,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public wordContainerIterator(String startWordHash, boolean rot) { public wordContainerIterator(String startWordHash, boolean rot) {
this.rot = rot; this.rot = rot;
this.iterator = (startWordHash == null) ? wCache.values().iterator() : wCache.tailMap(startWordHash).values().iterator(); this.iterator = (startWordHash == null) ? cache.values().iterator() : cache.tailMap(startWordHash).values().iterator();
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree. // The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
} }
@ -309,7 +262,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} else { } else {
// rotation iteration // rotation iteration
if (rot) { if (rot) {
iterator = wCache.values().iterator(); iterator = cache.values().iterator();
return ((indexContainer) iterator.next()).topLevelClone(); return ((indexContainer) iterator.next()).topLevelClone();
} else { } else {
return null; return null;
@ -322,35 +275,18 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
} }
public void shiftK2W() {
// find entries in kCache that are too old for that place and shift them to the wCache
long time;
Long l;
indexContainer container;
synchronized (kCache) {
while (kCache.size() > 0) {
l = (Long) kCache.firstKey();
time = l.longValue();
if (System.currentTimeMillis() - time < kCacheMaxAge) return;
container = (indexContainer) kCache.remove(l);
addEntries(container, container.updated(), false);
}
}
}
public String bestFlushWordHash() { public String bestFlushWordHash() {
// select appropriate hash // select appropriate hash
// we have 2 different methods to find a good hash: // we have 2 different methods to find a good hash:
// - the oldest entry in the cache // - the oldest entry in the cache
// - the entry with maximum count // - the entry with maximum count
shiftK2W(); if (cache.size() == 0) return null;
if (wCache.size() == 0) return null;
try { try {
synchronized (wCache) { synchronized (cache) {
String hash = null; String hash = null;
int count = hashScore.getMaxScore(); int count = hashScore.getMaxScore();
if ((count >= wCacheReferenceLimit) && if ((count >= cacheReferenceLimit) &&
((hash = (String) hashScore.getMaxObject()) != null)) { ((hash = (String) hashScore.getMaxObject()) != null)) {
// we MUST flush high-score entries, because a loop deletes entries in cache until this condition fails // we MUST flush high-score entries, because a loop deletes entries in cache until this condition fails
// in this cache we MUST NOT check wCacheMinAge // in this cache we MUST NOT check wCacheMinAge
@ -363,7 +299,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
return hash; return hash;
} }
// cases with respect to memory situation // cases with respect to memory situation
if (Runtime.getRuntime().freeMemory() < 1000000) { if (Runtime.getRuntime().freeMemory() < 100000) {
// urgent low-memory case // urgent low-memory case
hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM) hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM)
} else { } else {
@ -387,22 +323,26 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) { public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxtime_dummy) {
if (urlselection == null) {
return (indexContainer) wCache.get(wordHash); // retrieve container
} else { indexContainer container = (indexContainer) cache.get(wordHash);
indexContainer ic = (indexContainer) wCache.get(wordHash);
if (ic != null) { // We must not use the container from cache to store everything we find,
ic = ic.topLevelClone(); // as that container remains linked to in the cache and might be changed later
ic.select(urlselection); // while the returned container is still in use.
} // create a clone from the container
return ic; if (container != null) container = container.topLevelClone();
}
// select the urlselection
if ((urlselection != null) && (container != null)) container.select(urlselection);
return container;
} }
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
// returns the index that had been deleted // returns the index that had been deleted
synchronized (wCache) { synchronized (cache) {
indexContainer container = (indexContainer) wCache.remove(wordHash); indexContainer container = (indexContainer) cache.remove(wordHash);
hashScore.deleteScore(wordHash); hashScore.deleteScore(wordHash);
hashDate.deleteScore(wordHash); hashDate.deleteScore(wordHash);
return container; return container;
@ -410,7 +350,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (wCache) { synchronized (cache) {
indexContainer c = (indexContainer) deleteContainer(wordHash); indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) { if (c != null) {
if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true; if (c.removeEntry(wordHash, urlHash, deleteComplete)) return true;
@ -423,7 +363,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
if (urlHashes.size() == 0) return 0; if (urlHashes.size() == 0) return 0;
int count = 0; int count = 0;
synchronized (wCache) { synchronized (cache) {
indexContainer c = (indexContainer) deleteContainer(wordHash); indexContainer c = (indexContainer) deleteContainer(wordHash);
if (c != null) { if (c != null) {
count = c.removeEntries(wordHash, urlHashes, deleteComplete); count = c.removeEntries(wordHash, urlHashes, deleteComplete);
@ -432,14 +372,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
return count; return count;
} }
public int tryRemoveURLs(String urlHash) { public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this // this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh // urlHash assigned. This can only work if the entry is really fresh
// Such entries must be searched in the latest entries // Such entries must be searched in the latest entries
int delCount = 0; int delCount = 0;
synchronized (kCache) { synchronized (cache) {
Iterator i = kCache.entrySet().iterator(); Iterator i = cache.entrySet().iterator();
Map.Entry entry; Map.Entry entry;
Long l; Long l;
indexContainer c; indexContainer c;
@ -453,7 +393,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
if (c.size() == 0) { if (c.size() == 0) {
i.remove(); i.remove();
} else { } else {
kCache.put(l, c); // superfluous? cache.put(l, c); // superfluous?
} }
delCount++; delCount++;
} }
@ -467,20 +407,14 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
int added = 0; int added = 0;
// put new words into cache // put new words into cache
if (dhtCase) synchronized (kCache) { synchronized (cache) {
// put container into kCache
kCache.put(new Long(updateTime + kCacheInc), container);
kCacheInc++;
if (kCacheInc > 10000) kCacheInc = 0;
added = container.size();
} else synchronized (wCache) {
// put container into wCache // put container into wCache
String wordHash = container.getWordHash(); String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) wCache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexContainer(wordHash); if (entries == null) entries = new indexContainer(wordHash);
added = entries.add(container, -1); added = entries.add(container, -1);
if (added > 0) { if (added > 0) {
wCache.put(wordHash, entries); cache.put(wordHash, entries);
hashScore.addScore(wordHash, added); hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime)); hashDate.setScore(wordHash, intTime(updateTime));
} }
@ -490,20 +424,12 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
} }
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) { public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
if (dhtCase) synchronized (kCache) { synchronized (cache) {
// put container into kCache indexContainer container = (indexContainer) cache.get(wordHash);
indexContainer container = new indexContainer(wordHash);
container.add(newEntry);
kCache.put(new Long(updateTime + kCacheInc), container);
kCacheInc++;
if (kCacheInc > 10000) kCacheInc = 0;
return null;
} else synchronized (wCache) {
indexContainer container = (indexContainer) wCache.get(wordHash);
if (container == null) container = new indexContainer(wordHash); if (container == null) container = new indexContainer(wordHash);
indexEntry[] entries = new indexEntry[] { newEntry }; indexEntry[] entries = new indexEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) { if (container.add(entries, updateTime) > 0) {
wCache.put(wordHash, container); cache.put(wordHash, container);
hashScore.incScore(wordHash); hashScore.incScore(wordHash);
hashDate.setScore(wordHash, intTime(updateTime)); hashDate.setScore(wordHash, intTime(updateTime));
return null; return null;

View File

@ -475,13 +475,11 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
synchronized (rcContainers) { synchronized (rcContainers) {
String wordHash; String wordHash;
Iterator hashi = query.queryHashes.iterator(); Iterator hashi = query.queryHashes.iterator();
boolean dhtCache = false;
while (hashi.hasNext()) { while (hashi.hasNext()) {
wordHash = (String) hashi.next(); wordHash = (String) hashi.next();
rcContainers.setWordHash(wordHash); rcContainers.setWordHash(wordHash);
dhtCache = dhtCache | wordIndex.busyCacheFlush; wordIndex.addEntries(rcContainers, System.currentTimeMillis(), true);
wordIndex.addEntries(rcContainers, System.currentTimeMillis(), dhtCache); log.logFine("FLUSHED " + wordHash + ": " + rcContainers.size() + " url entries");
log.logFine("FLUSHED " + wordHash + ": " + rcContainers.size() + " url entries to " + ((dhtCache) ? "DHT cache" : "word cache"));
} }
// the rcGlobal was flushed, empty it // the rcGlobal was flushed, empty it
count += rcContainers.size(); count += rcContainers.size();

View File

@ -970,7 +970,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// flush some entries from the RAM cache // flush some entries from the RAM cache
wordIndex.flushCacheSome(false); wordIndex.flushCacheSome(false);
// adopt maximum cache size to current size to prevent that further OutOfMemoryErrors occur // adopt maximum cache size to current size to prevent that further OutOfMemoryErrors occur
int newMaxCount = Math.max(2000, Math.min((int) getConfigLong("wordCacheMaxCount", 20000), wordIndex.wSize())); int newMaxCount = Math.max(2000, Math.min((int) getConfigLong("wordCacheMaxCount", 20000), wordIndex.dhtOutCacheSize()));
setConfig("wordCacheMaxCount", Integer.toString(newMaxCount)); setConfig("wordCacheMaxCount", Integer.toString(newMaxCount));
wordIndex.setMaxWordCount(newMaxCount); wordIndex.setMaxWordCount(newMaxCount);
} }

View File

@ -75,6 +75,7 @@ import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder; import de.anomic.kelondro.kelondroOrder;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacyDHTAction;
public final class plasmaWordIndex extends indexAbstractRI implements indexRI { public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
@ -83,7 +84,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
private final File oldDatabaseRoot; private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true); private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private final indexRAMCacheRI ramCache; private final indexRAMCacheRI dhtOutCache, dhtInCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private int assortmentBufferSize; // kb private int assortmentBufferSize; // kb
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
@ -95,7 +96,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) { public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) {
this.oldDatabaseRoot = oldDatabaseRoot; this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log); this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log);
this.ramCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, log); this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log);
this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log);
// create assortment cluster path // create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath); File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
@ -120,32 +122,36 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return oldDatabaseRoot; return oldDatabaseRoot;
} }
public int maxURLinWCache() { public int maxURLinDHTOutCache() {
return ramCache.maxURLinWCache(); return dhtOutCache.maxURLinCache();
} }
public long minAgeOfWCache() { public long minAgeOfDHTOutCache() {
return ramCache.minAgeOfWCache(); return dhtOutCache.minAgeOfCache();
} }
public long maxAgeOfWCache() { public long maxAgeOfDHTOutCache() {
return ramCache.maxAgeOfWCache(); return dhtOutCache.maxAgeOfCache();
} }
public long minAgeOfKCache() { public int maxURLinDHTInCache() {
return ramCache.minAgeOfKCache(); return dhtInCache.maxURLinCache();
} }
public long maxAgeOfKCache() { public long minAgeOfDHTInCache() {
return ramCache.maxAgeOfKCache(); return dhtInCache.minAgeOfCache();
} }
public int wSize() { public long maxAgeOfDHTInCache() {
return ramCache.wSize(); return dhtInCache.maxAgeOfCache();
} }
public int kSize() { public int dhtOutCacheSize() {
return ramCache.kSize(); return dhtOutCache.size();
}
public int dhtInCacheSize() {
return dhtInCache.size();
} }
public int[] assortmentsSizes() { public int[] assortmentsSizes() {
@ -169,7 +175,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} }
public void setMaxWordCount(int maxWords) { public void setMaxWordCount(int maxWords) {
ramCache.setMaxWordCount(maxWords); dhtOutCache.setMaxWordCount(maxWords);
} }
public void setWordFlushDivisor(int idleDivisor, int busyDivisor) { public void setWordFlushDivisor(int idleDivisor, int busyDivisor) {
@ -179,50 +185,69 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public void flushControl() { public void flushControl() {
// check for forced flush // check for forced flush
synchronized (this) { ramCache.shiftK2W(); } synchronized (this) {
flushCache(ramCache.maxURLinWCache() - ramCache.wCacheReferenceLimit); if (dhtOutCache.size() > dhtOutCache.getMaxWordCount()) {
if (ramCache.wSize() > ramCache.getMaxWordCount()) { flushCache(dhtOutCache, dhtOutCache.size() + 500 - dhtOutCache.getMaxWordCount());
flushCache(ramCache.wSize() + 500 - ramCache.getMaxWordCount()); }
if (dhtInCache.size() > dhtInCache.getMaxWordCount()) {
flushCache(dhtInCache, dhtInCache.size() + 500 - dhtInCache.getMaxWordCount());
}
} }
} }
public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) { public indexContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtInCase) {
indexContainer c; // set dhtInCase depending on wordHash
if ((c = ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) == null) { if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;
if (!dhtCase) flushControl();
return null; // add the entry
} if (dhtInCase) {
return c; dhtInCache.addEntry(wordHash, entry, updateTime, true);
} else {
dhtOutCache.addEntry(wordHash, entry, updateTime, false);
flushControl();
}
return null;
} }
public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtCase) { public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
indexContainer added = ramCache.addEntries(entries, updateTime, dhtCase); // set dhtInCase depending on wordHash
// force flush if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(entries.getWordHash()))) dhtInCase = true;
if (!dhtCase) flushControl();
return added; // add the entry
if (dhtInCase) {
dhtInCache.addEntries(entries, updateTime, true);
} else {
dhtOutCache.addEntries(entries, updateTime, false);
flushControl();
}
return null;
} }
public void flushCacheSome(boolean busy) { public void flushCacheSome(boolean busy) {
synchronized (this) { ramCache.shiftK2W(); } flushCacheSome(dhtOutCache, busy);
int flushCount = (busy) ? ramCache.wSize() / busyDivisor : ramCache.wSize() / idleDivisor; flushCacheSome(dhtInCache, busy);
if (flushCount > 100) flushCount = 100;
if (flushCount < 1) flushCount = Math.min(1, ramCache.wSize());
flushCache(flushCount);
} }
public void flushCache(int count) { private void flushCacheSome(indexRAMCacheRI ram, boolean busy) {
int flushCount = (busy) ? ram.size() / busyDivisor : ram.size() / idleDivisor;
if (flushCount > 100) flushCount = 100;
if (flushCount < 1) flushCount = Math.min(1, ram.size());
flushCache(ram, flushCount);
}
private void flushCache(indexRAMCacheRI ram, int count) {
if (count <= 0) return; if (count <= 0) return;
busyCacheFlush = true; busyCacheFlush = true;
String wordHash; String wordHash;
//System.out.println("DEBUG-Started flush of " + count + " entries from RAM to DB"); //System.out.println("DEBUG-Started flush of " + count + " entries from RAM to DB");
//long start = System.currentTimeMillis(); //long start = System.currentTimeMillis();
for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ? for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
if (ramCache.wSize() == 0) break; if (ram.size() == 0) break;
synchronized (this) { synchronized (this) {
wordHash = ramCache.bestFlushWordHash(); wordHash = ram.bestFlushWordHash();
// flush the wordHash // flush the wordHash
indexContainer c = ramCache.deleteContainer(wordHash); indexContainer c = ram.deleteContainer(wordHash);
if (c != null) { if (c != null) {
if (useCollectionIndex) { if (useCollectionIndex) {
indexContainer feedback = collections.addEntries(c, c.updated(), false); indexContainer feedback = collections.addEntries(c, c.updated(), false);
@ -325,13 +350,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
// get from cache // get from cache
indexContainer container = ramCache.getContainer(wordHash, urlselection, true, -1); indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1);
if (container == null) {
// We must not use the container from cache to store everything we find, container = dhtInCache.getContainer(wordHash, urlselection, true, -1);
// as that container remains linked to in the cache and might be changed later } else {
// while the returned container is still in use. indexContainer ic = dhtInCache.getContainer(wordHash, urlselection, true, -1);
// create a clone from the container if (ic != null) container.add(ic, -1);
if (container != null) container = container.topLevelClone(); }
// get from collection index // get from collection index
if (useCollectionIndex) { if (useCollectionIndex) {
@ -393,10 +418,12 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
if (useCollectionIndex) if (useCollectionIndex)
return java.lang.Math.max(collections.size(), return java.lang.Math.max(collections.size(),
java.lang.Math.max(assortmentCluster.size(), java.lang.Math.max(assortmentCluster.size(),
java.lang.Math.max(backend.size(), ramCache.size()))); java.lang.Math.max(backend.size(),
java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()))));
else else
return java.lang.Math.max(assortmentCluster.size(), return java.lang.Math.max(assortmentCluster.size(),
java.lang.Math.max(backend.size(), ramCache.size())); java.lang.Math.max(backend.size(),
java.lang.Math.max(dhtInCache.size(), dhtOutCache.size())));
} }
public int indexSize(String wordHash) { public int indexSize(String wordHash) {
@ -410,13 +437,15 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} catch (IOException e) {} } catch (IOException e) {}
if (useCollectionIndex) size += collections.indexSize(wordHash); if (useCollectionIndex) size += collections.indexSize(wordHash);
size += assortmentCluster.indexSize(wordHash); size += assortmentCluster.indexSize(wordHash);
size += ramCache.indexSize(wordHash); size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
return size; return size;
} }
public void close(int waitingBoundSeconds) { public void close(int waitingBoundSeconds) {
synchronized (this) { synchronized (this) {
ramCache.close(waitingBoundSeconds); dhtInCache.close(waitingBoundSeconds);
dhtOutCache.close(waitingBoundSeconds);
if (useCollectionIndex) collections.close(-1); if (useCollectionIndex) collections.close(-1);
assortmentCluster.close(-1); assortmentCluster.close(-1);
backend.close(10); backend.close(10);
@ -424,8 +453,9 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} }
public indexContainer deleteContainer(String wordHash) { public indexContainer deleteContainer(String wordHash) {
indexContainer c = ramCache.deleteContainer(wordHash); indexContainer c = new indexContainer(wordHash);
if (c == null) c = new indexContainer(wordHash); c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1); if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1);
c.add(assortmentCluster.deleteContainer(wordHash), -1); c.add(assortmentCluster.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1); c.add(backend.deleteContainer(wordHash), -1);
@ -433,7 +463,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} }
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) { public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
if (ramCache.removeEntry(wordHash, urlHash, deleteComplete)) return true; if (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
if (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
if (useCollectionIndex) {if (collections.removeEntry(wordHash, urlHash, deleteComplete)) return true;} if (useCollectionIndex) {if (collections.removeEntry(wordHash, urlHash, deleteComplete)) return true;}
if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true; if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true;
return backend.removeEntry(wordHash, urlHash, deleteComplete); return backend.removeEntry(wordHash, urlHash, deleteComplete);
@ -441,7 +472,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) { public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
int removed = 0; int removed = 0;
removed += ramCache.removeEntries(wordHash, urlHashes, deleteComplete); removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed; if (removed == urlHashes.size()) return removed;
if (useCollectionIndex) { if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete); removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
@ -453,35 +485,35 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return removed; return removed;
} }
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// and can be found in the RAM cache
// this returns the number of deletion that had been possible
return ramCache.tryRemoveURLs(urlHash);
}
public static final int RL_RAMCACHE = 0; public static final int RL_RAMCACHE = 0;
public static final int RL_COLLECTIONS = 1; // the new index structure public static final int RL_COLLECTIONS = 1; // the new index structure
public static final int RL_ASSORTMENTS = 2; // (to be) outdated structure public static final int RL_ASSORTMENTS = 2; // (to be) outdated structure
public static final int RL_WORDFILES = 3; // (to be) outdated structure public static final int RL_WORDFILES = 3; // (to be) outdated structure
public int tryRemoveURLs(String urlHash) {
// this tries to delete an index from the cache that has this
// urlHash assigned. This can only work if the entry is really fresh
// and can be found in the RAM cache
// this returns the number of deletion that had been possible
return dhtInCache.tryRemoveURLs(urlHash);
}
public TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) throws IOException { public TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) throws IOException {
// creates a set of indexContainers // creates a set of indexContainers
// this does not use the dhtInCache
kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone()); kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
containerOrder.rotate(startHash.getBytes()); containerOrder.rotate(startHash.getBytes());
TreeSet containers = new TreeSet(containerOrder); TreeSet containers = new TreeSet(containerOrder);
Iterator i = wordContainers(startHash, resourceLevel, rot); Iterator i = wordContainers(startHash, resourceLevel, rot);
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) count = Math.min(ramCache.wSize(), count); if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) count = Math.min(dhtOutCache.size(), count);
indexContainer container; indexContainer container;
while ((count > 0) && (i.hasNext())) { while ((count > 0) && (i.hasNext())) {
container = (indexContainer) i.next(); container = (indexContainer) i.next();
if ((container != null) && (container.size() > 0)) { if ((container != null) && (container.size() > 0)) {
containers.add(container); containers.add(container);
count--; count--;
}
} }
}
return containers; return containers;
} }
@ -501,11 +533,11 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
private Iterator wordContainers(String startWordHash, int resourceLevel) throws IOException { private Iterator wordContainers(String startWordHash, int resourceLevel) throws IOException {
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) { if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) {
return ramCache.wordContainers(startWordHash, false); return dhtOutCache.wordContainers(startWordHash, false);
} }
if ((resourceLevel == plasmaWordIndex.RL_COLLECTIONS) && (useCollectionIndex)) { if ((resourceLevel == plasmaWordIndex.RL_COLLECTIONS) && (useCollectionIndex)) {
return new kelondroMergeIterator( return new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false), dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false), collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder), new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,
@ -515,7 +547,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
if (useCollectionIndex) { if (useCollectionIndex) {
return new kelondroMergeIterator( return new kelondroMergeIterator(
new kelondroMergeIterator( new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false), dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false), collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder), new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,
@ -526,7 +558,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
true); true);
} else { } else {
return new kelondroMergeIterator( return new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false), dhtOutCache.wordContainers(startWordHash, false),
assortmentCluster.wordContainers(startWordHash, true, false), assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder), new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,
@ -538,7 +570,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
return new kelondroMergeIterator( return new kelondroMergeIterator(
new kelondroMergeIterator( new kelondroMergeIterator(
new kelondroMergeIterator( new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false), dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false), collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder), new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,
@ -554,7 +586,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
} else { } else {
return new kelondroMergeIterator( return new kelondroMergeIterator(
new kelondroMergeIterator( new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false), dhtOutCache.wordContainers(startWordHash, false),
assortmentCluster.wordContainers(startWordHash, true, false), assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder), new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod, indexContainer.containerMergeMethod,

View File

@ -231,7 +231,12 @@ public class yacyDHTAction implements yacyPeerAction {
public void processPeerPing(yacySeed peer) { public void processPeerPing(yacySeed peer) {
} }
public static boolean shallBeOwnWord(String wordhash) {
final double distance = dhtDistance(yacyCore.seedDB.mySeed.hash, wordhash);
final double max = 1.2 / yacyCore.seedDB.sizeConnected();
//System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max);
return (distance > 0) && (distance <= max);
}
public static double dhtDistance(String peer, String word) { public static double dhtDistance(String peer, String word) {
// the dht distance is a positive value between 0 and 1 // the dht distance is a positive value between 0 and 1