diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index a40874cd2..3b96369e6 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -161,6 +161,24 @@ Increasing this cache will be most important for a fast proxy mode. This cache is very important for a fast search process. Increasing the cache size will result in more search results and less IO during DHT transfer. + +'pre-noticed' URLs +#[slreqPreNURL]# +#[chunkPreNURL]# +#[slempPreNURL]# +#[slfilPreNURL]# +#[slhittmissPreNURL]#
#[sluniqdoubPreNURL]#
#[slflushPreNURL]# +#[ochunkmaxPreNURL]# +#[ochunkcurPreNURL]# +#[ohittmissPreNURL]#
#[ouniqdoubPreNURL]#
#[oflushPreNURL]# +#[nhittmissPreNURL]#
#[nuniqdoubPreNURL]#
#[nflushPreNURL]# +#[usedPreNURL]# + +#[dfltPreNURL]# +#[bestPreNURL]# + + + 'noticed' URLs #[slreqNURL]# diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 35e4783b6..bac871914 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -93,6 +93,7 @@ public class PerformanceMemory_p { env.setConfig("ramCacheNews", Long.parseLong(post.get("ramCacheNews", "0")) * KB); env.setConfig("ramCacheRobots", Long.parseLong(post.get("ramCacheRobots", "0")) * KB); env.setConfig("ramCacheProfiles", Long.parseLong(post.get("ramCacheProfiles", "0")) * KB); + env.setConfig("ramCachePreNURL", Long.parseLong(post.get("ramCachePreNURL", "0")) * KB); } if (post.containsKey("setDefault")) { env.setConfig("ramCacheRWI", Long.parseLong((String) defaultSettings.get("ramCacheRWI"))); @@ -107,6 +108,7 @@ public class PerformanceMemory_p { env.setConfig("ramCacheNews", Long.parseLong((String) defaultSettings.get("ramCacheNews"))); env.setConfig("ramCacheRobots", Long.parseLong((String) defaultSettings.get("ramCacheRobots"))); env.setConfig("ramCacheProfiles", Long.parseLong((String) defaultSettings.get("ramCacheProfiles"))); + env.setConfig("ramCachePreNURL", Long.parseLong((String) defaultSettings.get("ramCachePreNURL"))); } if (post.containsKey("setGood")) set = "setGood"; if (post.containsKey("setBest")) set = "setBest"; @@ -182,6 +184,12 @@ public class PerformanceMemory_p { ost = sb.urlPool.loadedURL.cacheObjectStatus(); putprop(prop, env, "LURL", set); + req = sb.sbStackCrawlThread.size(); + chk = sb.sbStackCrawlThread.cacheNodeChunkSize(); + slt = sb.sbStackCrawlThread.cacheNodeStatus(); + ost = sb.sbStackCrawlThread.cacheObjectStatus(); + putprop(prop, env, "PreNURL", set); + req = sb.urlPool.noticeURL.size(); chk = sb.urlPool.noticeURL.cacheNodeChunkSize(); slt = sb.urlPool.noticeURL.cacheNodeStatus(); diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 37c17ace3..caa67bf88 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -62,7 +62,6 @@ import de.anomic.http.httpc; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroException; -import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroTree; import de.anomic.plasma.plasmaCrawlEURL; @@ -152,6 +151,18 @@ public final class plasmaCrawlStacker { public int size() { return this.queue.size(); } + + public int cacheNodeChunkSize() { + return this.queue.cacheNodeChunkSize(); + } + + public int[] cacheNodeStatus() { + return this.queue.cacheNodeStatus(); + } + + public String[] cacheObjectStatus() { + return this.queue.cacheObjectStatus(); + } public void job() { try { @@ -563,7 +574,7 @@ public final class plasmaCrawlStacker { private final serverSemaphore readSync; private final serverSemaphore writeSync; private final LinkedList urlEntryHashCache; - private kelondroIndex urlEntryCache; + private kelondroTree urlEntryCache; public stackCrawlQueue(File cacheStacksPath, int bufferkb, long preloadTime) { // init the read semaphore @@ -626,6 +637,18 @@ public final class plasmaCrawlStacker { } } + public int cacheNodeChunkSize() { + return urlEntryCache.cacheNodeChunkSize(); + } + + public int[] cacheNodeStatus() { + return urlEntryCache.cacheNodeStatus(); + } + + public String[] cacheObjectStatus() { + return urlEntryCache.cacheObjectStatus(); + } + public void close() throws IOException { // closing the db this.urlEntryCache.close(); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 66a74402f..db6006e0f 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -406,10 +406,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // start indexing management log.logConfig("Starting Indexing Management"); urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time); - wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false)); - int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000); - wordIndex.setMaxWordCount(wordCacheMaxCount); + + // set a high maximum cache size to current size; this is adopted later automatically + int wordCacheMaxCount = Math.max(20000, (int) getConfigLong("wordCacheMaxCount", 20000)); + setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount)); + wordIndex.setMaxWordCount(wordCacheMaxCount); // start a cache manager log.logConfig("Starting HT Cache Manager"); diff --git a/yacy.init b/yacy.init index 888468295..085ad7712 100644 --- a/yacy.init +++ b/yacy.init @@ -464,24 +464,24 @@ xpstopw=true # and another idlesleep is performed 20_dhtdistribution_idlesleep=30000 20_dhtdistribution_busysleep=10000 -20_dhtdistribution_memprereq=8388608 +20_dhtdistribution_memprereq=4194304 30_peerping_idlesleep=120000 30_peerping_busysleep=120000 30_peerping_memprereq=1048576 40_peerseedcycle_idlesleep=1800000 40_peerseedcycle_busysleep=1200000 -40_peerseedcycle_memprereq=4194304 +40_peerseedcycle_memprereq=2097152 50_localcrawl_idlesleep=10000 50_localcrawl_busysleep=300 -50_localcrawl_memprereq=1048576 +50_localcrawl_memprereq=2097152 50_localcrawl_isPaused=false 61_globalcrawltrigger_idlesleep=10000 61_globalcrawltrigger_busysleep=500 -61_globalcrawltrigger_memprereq=1048576 +61_globalcrawltrigger_memprereq=2097152 61_globalcrawltrigger_isPaused=false 62_remotetriggeredcrawl_idlesleep=10000 62_remotetriggeredcrawl_busysleep=1000 -62_remotetriggeredcrawl_memprereq=1048576 +62_remotetriggeredcrawl_memprereq=4194304 62_remotetriggeredcrawl_isPaused=false 70_cachemanager_idlesleep=5000 70_cachemanager_busysleep=0 @@ -508,7 +508,7 @@ xpstopw=true # ram cache for database files # ram cache for assortment cache cluster (for all 64 files) -ramCacheRWI = 8388608 +ramCacheRWI = 4194304 ramCacheRWI_time = 30000 # ram cache for responseHeader.db @@ -516,11 +516,15 @@ ramCacheHTTP = 1048576 ramCacheHTTP_time = 1000 # ram cache for urlHash.db -ramCacheLURL = 8388608 +ramCacheLURL = 4194304 ramCacheLURL_time = 10000 +# ram cache for stack crawl thread db +ramCachePreNURL = 1048576 +ramCachePreNURL_time = 3000 + # ram cache for urlNotice.db -ramCacheNURL = 4194304 +ramCacheNURL = 1048576 ramCacheNURL_time = 2000 # ram cache for urlErr.db @@ -548,17 +552,13 @@ ramCacheNews = 1048576 ramCacheNews_time = 1000 # ram cache for robotsTxt.db -ramCacheRobots = 4194304 +ramCacheRobots = 2097152 ramCacheRobots_time = 3000 # ram cache for crawlProfile.db ramCacheProfiles = 8192 ramCacheProfiles_time= 500 -# ram cache for stack crawl thread db -ramCachePreNURL = 4194304 -ramCachePreNURL_time = 3000 - # default memory settings for startup of yacy # is only valid in unix/shell environments and # not for first startup of YaCy @@ -577,7 +577,7 @@ javastart_Xms=Xms10m # remote index transmissions and search requests # maxWaitingWordFlush gives the number of seconds that the shutdown # may last for the word flush -wordCacheMaxCount = 12000 +wordCacheMaxCount = 20000 # Specifies if yacy can be used as transparent http proxy. #