better memory control:

- added memory monitor for preNURL-db in performanceMemory - changed default memory assignments git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2427 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2006-08-19 13:09:04 +00:00 · 2006-08-19 13:09:04 +00:00 · 1ce3c22761
commit 1ce3c22761
parent 39b4c26bdc
5 changed files with 70 additions and 19 deletions
--- a/htroot/PerformanceMemory_p.html
+++ b/htroot/PerformanceMemory_p.html
@ -161,6 +161,24 @@ Increasing this cache will be most important for a fast proxy mode.</td>
 This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer.</td>
 </tr>

+<tr class="TableCellDark">
+<td class="small" align="left">'pre-noticed' URLs</td>
+<td class="small" align="right">#[slreqPreNURL]#</td>
+<td class="small" align="right">#[chunkPreNURL]#</td>
+<td class="small" align="right">#[slempPreNURL]#</td>
+<td class="small" align="right">#[slfilPreNURL]#</td>
+<td class="small" align="right">#[slhittmissPreNURL]#<br>#[sluniqdoubPreNURL]#<br>#[slflushPreNURL]#</td>
+<td class="small" align="right">#[ochunkmaxPreNURL]#</td>
+<td class="small" align="right">#[ochunkcurPreNURL]#</td>
+<td class="small" align="right">#[ohittmissPreNURL]#<br>#[ouniqdoubPreNURL]#<br>#[oflushPreNURL]#</td>
+<td class="small" align="right">#[nhittmissPreNURL]#<br>#[nuniqdoubPreNURL]#<br>#[nflushPreNURL]#</td>
+<td class="small" align="right">#[usedPreNURL]#</td>
+<td class="small" align="right"><input name="ramCachePreNURL" type="text" align="right" size="6" maxlength="7" value="#[ramCachePreNURL]#"></td>
+<td class="small" align="right">#[dfltPreNURL]#</td>
+<td class="small" align="right">#[bestPreNURL]#</td>
+<td class="small" align="left"></td>
+</tr>
+
 <tr class="TableCellDark">
 <td class="small" align="left">'noticed' URLs</td>
 <td class="small" align="right">#[slreqNURL]#</td>
--- a/htroot/PerformanceMemory_p.java
+++ b/htroot/PerformanceMemory_p.java
@ -93,6 +93,7 @@ public class PerformanceMemory_p {
                env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB);
                env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB);
                env.setConfig("ramCacheProfiles", Long.parseLong(post.get("ramCacheProfiles", "0")) * KB);
+                env.setConfig("ramCachePreNURL", Long.parseLong(post.get("ramCachePreNURL", "0")) * KB);
            }
            if (post.containsKey("setDefault")) {
                env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI")));
@ -107,6 +108,7 @@ public class PerformanceMemory_p {
                env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews")));
                env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots")));
                env.setConfig("ramCacheProfiles", Long.parseLong((String) defaultSettings.get("ramCacheProfiles")));
+                env.setConfig("ramCachePreNURL", Long.parseLong((String) defaultSettings.get("ramCachePreNURL")));
            }
            if (post.containsKey("setGood")) set = "setGood";
            if (post.containsKey("setBest")) set = "setBest";
@ -182,6 +184,12 @@ public class PerformanceMemory_p {
        ost = sb.urlPool.loadedURL.cacheObjectStatus();
        putprop(prop, env, "LURL", set);
        
+        req = sb.sbStackCrawlThread.size();
+        chk = sb.sbStackCrawlThread.cacheNodeChunkSize();
+        slt = sb.sbStackCrawlThread.cacheNodeStatus();
+        ost = sb.sbStackCrawlThread.cacheObjectStatus();
+        putprop(prop, env, "PreNURL", set);
+        
        req = sb.urlPool.noticeURL.size();
        chk = sb.urlPool.noticeURL.cacheNodeChunkSize();
        slt = sb.urlPool.noticeURL.cacheNodeStatus();
--- a/source/de/anomic/plasma/plasmaCrawlStacker.java
+++ b/source/de/anomic/plasma/plasmaCrawlStacker.java
@ -62,7 +62,6 @@ import de.anomic.http.httpc;
 import de.anomic.index.indexURL;
 import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroIndex;
 import de.anomic.kelondro.kelondroRow;
 import de.anomic.kelondro.kelondroTree;
 import de.anomic.plasma.plasmaCrawlEURL;
@ -152,6 +151,18 @@ public final class plasmaCrawlStacker {
    public int size() {
        return this.queue.size();
    }
+
+    public int cacheNodeChunkSize() {
+        return this.queue.cacheNodeChunkSize();
+    }
+    
+    public int[] cacheNodeStatus() {
+        return this.queue.cacheNodeStatus();
+    }
+    
+    public String[] cacheObjectStatus() {
+        return this.queue.cacheObjectStatus();
+    }
    
    public void job() {
        try {
@ -563,7 +574,7 @@ public final class plasmaCrawlStacker {
        private final serverSemaphore readSync;
        private final serverSemaphore writeSync;
        private final LinkedList urlEntryHashCache;
-        private kelondroIndex urlEntryCache;
+        private kelondroTree urlEntryCache;
        
        public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) {
            // init the read semaphore
@ -626,6 +637,18 @@ public final class plasmaCrawlStacker {
            }
        }
        
+        public int cacheNodeChunkSize() {
+            return urlEntryCache.cacheNodeChunkSize();
+        }
+        
+        public int[] cacheNodeStatus() {
+            return urlEntryCache.cacheNodeStatus();
+        }
+        
+        public String[] cacheObjectStatus() {
+            return urlEntryCache.cacheObjectStatus();
+        }
+        
        public void close() throws IOException {
            // closing the db
            this.urlEntryCache.close();
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -406,10 +406,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        // start indexing management
        log.logConfig("Starting Indexing Management");
        urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time);
-        
        wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
-        int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
-        wordIndex.setMaxWordCount(wordCacheMaxCount);
+
+        // set a high maximum cache size to current size; this is adopted later automatically
+        int wordCacheMaxCount = Math.max(20000, (int) getConfigLong("wordCacheMaxCount", 20000));
+        setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount));
+        wordIndex.setMaxWordCount(wordCacheMaxCount); 
        
        // start a cache manager
        log.logConfig("Starting HT Cache Manager");
--- a/yacy.init
+++ b/yacy.init
@ -464,24 +464,24 @@ xpstopw=true
 # and another idlesleep is performed
 20_dhtdistribution_idlesleep=30000
 20_dhtdistribution_busysleep=10000
-20_dhtdistribution_memprereq=8388608
+20_dhtdistribution_memprereq=4194304
 30_peerping_idlesleep=120000
 30_peerping_busysleep=120000
 30_peerping_memprereq=1048576
 40_peerseedcycle_idlesleep=1800000
 40_peerseedcycle_busysleep=1200000
-40_peerseedcycle_memprereq=4194304
+40_peerseedcycle_memprereq=2097152
 50_localcrawl_idlesleep=10000
 50_localcrawl_busysleep=300
-50_localcrawl_memprereq=1048576
+50_localcrawl_memprereq=2097152
 50_localcrawl_isPaused=false
 61_globalcrawltrigger_idlesleep=10000
 61_globalcrawltrigger_busysleep=500
-61_globalcrawltrigger_memprereq=1048576
+61_globalcrawltrigger_memprereq=2097152
 61_globalcrawltrigger_isPaused=false
 62_remotetriggeredcrawl_idlesleep=10000
 62_remotetriggeredcrawl_busysleep=1000
-62_remotetriggeredcrawl_memprereq=1048576
+62_remotetriggeredcrawl_memprereq=4194304
 62_remotetriggeredcrawl_isPaused=false
 70_cachemanager_idlesleep=5000
 70_cachemanager_busysleep=0
@ -508,7 +508,7 @@ xpstopw=true
 # ram cache for database files

 # ram cache for assortment cache cluster (for all 64 files)
-ramCacheRWI          =  8388608
+ramCacheRWI          =  4194304
 ramCacheRWI_time     =    30000

 # ram cache for responseHeader.db
@ -516,11 +516,15 @@ ramCacheHTTP         =  1048576
 ramCacheHTTP_time    =     1000

 # ram cache for urlHash.db
-ramCacheLURL         =  8388608
+ramCacheLURL         =  4194304
 ramCacheLURL_time    =    10000

+# ram cache for stack crawl thread db
+ramCachePreNURL      =  1048576
+ramCachePreNURL_time =     3000
+
 # ram cache for urlNotice.db
-ramCacheNURL         =  4194304
+ramCacheNURL         =  1048576
 ramCacheNURL_time    =     2000

 # ram cache for urlErr.db 
@ -548,17 +552,13 @@ ramCacheNews         =  1048576
 ramCacheNews_time    =     1000

 # ram cache for robotsTxt.db
-ramCacheRobots       =  4194304
+ramCacheRobots       =  2097152
 ramCacheRobots_time  =     3000

 # ram cache for crawlProfile.db
 ramCacheProfiles     =     8192
 ramCacheProfiles_time=      500

-# ram cache for stack crawl thread db
-ramCachePreNURL      =  4194304
-ramCachePreNURL_time =     3000
-
 # default memory settings for startup of yacy
 # is only valid in unix/shell environments and
 # not for first startup of YaCy
@ -577,7 +577,7 @@ javastart_Xms=Xms10m
 # remote index transmissions and search requests
 # maxWaitingWordFlush gives the number of seconds that the shutdown
 # may last for the word flush
-wordCacheMaxCount = 12000
+wordCacheMaxCount = 20000

 # Specifies if yacy can be used as transparent http proxy.
 #