better memory control:

- added memory monitor for preNURL-db in performanceMemory
- changed default memory assignments

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2427 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-08-19 13:09:04 +00:00
parent 39b4c26bdc
commit 1ce3c22761
5 changed files with 70 additions and 19 deletions

View File

@ -161,6 +161,24 @@ Increasing this cache will be most important for a fast proxy mode.</td>
This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer.</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">'pre-noticed' URLs</td>
<td class="small" align="right">#[slreqPreNURL]#</td>
<td class="small" align="right">#[chunkPreNURL]#</td>
<td class="small" align="right">#[slempPreNURL]#</td>
<td class="small" align="right">#[slfilPreNURL]#</td>
<td class="small" align="right">#[slhittmissPreNURL]#<br>#[sluniqdoubPreNURL]#<br>#[slflushPreNURL]#</td>
<td class="small" align="right">#[ochunkmaxPreNURL]#</td>
<td class="small" align="right">#[ochunkcurPreNURL]#</td>
<td class="small" align="right">#[ohittmissPreNURL]#<br>#[ouniqdoubPreNURL]#<br>#[oflushPreNURL]#</td>
<td class="small" align="right">#[nhittmissPreNURL]#<br>#[nuniqdoubPreNURL]#<br>#[nflushPreNURL]#</td>
<td class="small" align="right">#[usedPreNURL]#</td>
<td class="small" align="right"><input name="ramCachePreNURL" type="text" align="right" size="6" maxlength="7" value="#[ramCachePreNURL]#"></td>
<td class="small" align="right">#[dfltPreNURL]#</td>
<td class="small" align="right">#[bestPreNURL]#</td>
<td class="small" align="left"></td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">'noticed' URLs</td>
<td class="small" align="right">#[slreqNURL]#</td>

View File

@ -93,6 +93,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB);
env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB);
env.setConfig("ramCacheProfiles", Long.parseLong(post.get("ramCacheProfiles", "0")) * KB);
env.setConfig("ramCachePreNURL", Long.parseLong(post.get("ramCachePreNURL", "0")) * KB);
}
if (post.containsKey("setDefault")) {
env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI")));
@ -107,6 +108,7 @@ public class PerformanceMemory_p {
env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews")));
env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots")));
env.setConfig("ramCacheProfiles", Long.parseLong((String) defaultSettings.get("ramCacheProfiles")));
env.setConfig("ramCachePreNURL", Long.parseLong((String) defaultSettings.get("ramCachePreNURL")));
}
if (post.containsKey("setGood")) set = "setGood";
if (post.containsKey("setBest")) set = "setBest";
@ -182,6 +184,12 @@ public class PerformanceMemory_p {
ost = sb.urlPool.loadedURL.cacheObjectStatus();
putprop(prop, env, "LURL", set);
req = sb.sbStackCrawlThread.size();
chk = sb.sbStackCrawlThread.cacheNodeChunkSize();
slt = sb.sbStackCrawlThread.cacheNodeStatus();
ost = sb.sbStackCrawlThread.cacheObjectStatus();
putprop(prop, env, "PreNURL", set);
req = sb.urlPool.noticeURL.size();
chk = sb.urlPool.noticeURL.cacheNodeChunkSize();
slt = sb.urlPool.noticeURL.cacheNodeStatus();

View File

@ -62,7 +62,6 @@ import de.anomic.http.httpc;
import de.anomic.index.indexURL;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.plasma.plasmaCrawlEURL;
@ -152,6 +151,18 @@ public final class plasmaCrawlStacker {
public int size() {
return this.queue.size();
}
public int cacheNodeChunkSize() {
return this.queue.cacheNodeChunkSize();
}
public int[] cacheNodeStatus() {
return this.queue.cacheNodeStatus();
}
public String[] cacheObjectStatus() {
return this.queue.cacheObjectStatus();
}
public void job() {
try {
@ -563,7 +574,7 @@ public final class plasmaCrawlStacker {
private final serverSemaphore readSync;
private final serverSemaphore writeSync;
private final LinkedList urlEntryHashCache;
private kelondroIndex urlEntryCache;
private kelondroTree urlEntryCache;
public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) {
// init the read semaphore
@ -626,6 +637,18 @@ public final class plasmaCrawlStacker {
}
}
public int cacheNodeChunkSize() {
return urlEntryCache.cacheNodeChunkSize();
}
public int[] cacheNodeStatus() {
return urlEntryCache.cacheNodeStatus();
}
public String[] cacheObjectStatus() {
return urlEntryCache.cacheObjectStatus();
}
public void close() throws IOException {
// closing the db
this.urlEntryCache.close();

View File

@ -406,10 +406,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// start indexing management
log.logConfig("Starting Indexing Management");
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time);
wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
wordIndex.setMaxWordCount(wordCacheMaxCount);
// set a high maximum cache size to current size; this is adopted later automatically
int wordCacheMaxCount = Math.max(20000, (int) getConfigLong("wordCacheMaxCount", 20000));
setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount));
wordIndex.setMaxWordCount(wordCacheMaxCount);
// start a cache manager
log.logConfig("Starting HT Cache Manager");

View File

@ -464,24 +464,24 @@ xpstopw=true
# and another idlesleep is performed
20_dhtdistribution_idlesleep=30000
20_dhtdistribution_busysleep=10000
20_dhtdistribution_memprereq=8388608
20_dhtdistribution_memprereq=4194304
30_peerping_idlesleep=120000
30_peerping_busysleep=120000
30_peerping_memprereq=1048576
40_peerseedcycle_idlesleep=1800000
40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304
40_peerseedcycle_memprereq=2097152
50_localcrawl_idlesleep=10000
50_localcrawl_busysleep=300
50_localcrawl_memprereq=1048576
50_localcrawl_memprereq=2097152
50_localcrawl_isPaused=false
61_globalcrawltrigger_idlesleep=10000
61_globalcrawltrigger_busysleep=500
61_globalcrawltrigger_memprereq=1048576
61_globalcrawltrigger_memprereq=2097152
61_globalcrawltrigger_isPaused=false
62_remotetriggeredcrawl_idlesleep=10000
62_remotetriggeredcrawl_busysleep=1000
62_remotetriggeredcrawl_memprereq=1048576
62_remotetriggeredcrawl_memprereq=4194304
62_remotetriggeredcrawl_isPaused=false
70_cachemanager_idlesleep=5000
70_cachemanager_busysleep=0
@ -508,7 +508,7 @@ xpstopw=true
# ram cache for database files
# ram cache for assortment cache cluster (for all 64 files)
ramCacheRWI = 8388608
ramCacheRWI = 4194304
ramCacheRWI_time = 30000
# ram cache for responseHeader.db
@ -516,11 +516,15 @@ ramCacheHTTP = 1048576
ramCacheHTTP_time = 1000
# ram cache for urlHash.db
ramCacheLURL = 8388608
ramCacheLURL = 4194304
ramCacheLURL_time = 10000
# ram cache for stack crawl thread db
ramCachePreNURL = 1048576
ramCachePreNURL_time = 3000
# ram cache for urlNotice.db
ramCacheNURL = 4194304
ramCacheNURL = 1048576
ramCacheNURL_time = 2000
# ram cache for urlErr.db
@ -548,17 +552,13 @@ ramCacheNews = 1048576
ramCacheNews_time = 1000
# ram cache for robotsTxt.db
ramCacheRobots = 4194304
ramCacheRobots = 2097152
ramCacheRobots_time = 3000
# ram cache for crawlProfile.db
ramCacheProfiles = 8192
ramCacheProfiles_time= 500
# ram cache for stack crawl thread db
ramCachePreNURL = 4194304
ramCachePreNURL_time = 3000
# default memory settings for startup of yacy
# is only valid in unix/shell environments and
# not for first startup of YaCy
@ -577,7 +577,7 @@ javastart_Xms=Xms10m
# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush
wordCacheMaxCount = 12000
wordCacheMaxCount = 20000
# Specifies if yacy can be used as transparent http proxy.
#