diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index d8f47e1bb..89b8bcbdd 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -204,11 +204,8 @@ public class PerformanceQueues_p { // getting the current crawler pool configuration int maxActive = Integer.parseInt(post.get("Crawler Pool_maxActive","8")); - // accept new crawler pool settings - plasmaSwitchboard.crawlSlots = maxActive; - // storing the new values into configfile - switchboard.setConfig("crawler.MaxActiveThreads",maxActive); + switchboard.setConfig(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX,maxActive); //switchboard.setConfig("crawler.MinIdleThreads",minIdle); /* diff --git a/htroot/Status.java b/htroot/Status.java index 5439ff389..2d9a584e4 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -328,14 +328,14 @@ public class Status { // Queue information int indexingJobCount = sb.getThread("80_indexing").getJobCount()+sb.indexingTasksInProcess.size(); - int indexingMaxCount = plasmaSwitchboard.indexingSlots; + int indexingMaxCount = (int) sb.getConfigLong(plasmaSwitchboard.INDEXER_SLOTS, 30); int indexingPercent = (indexingMaxCount==0)?0:indexingJobCount*100/indexingMaxCount; prop.putNum("indexingQueueSize", indexingJobCount); prop.putNum("indexingQueueMax", indexingMaxCount); prop.put("indexingQueuePercent",(indexingPercent>100) ? 100 : indexingPercent); int loaderJobCount = sb.crawlQueues.size(); - int loaderMaxCount = plasmaSwitchboard.crawlSlots; + int loaderMaxCount = Integer.parseInt(sb.getConfig(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, "10")); int loaderPercent = (loaderMaxCount==0)?0:loaderJobCount*100/loaderMaxCount; prop.putNum("loaderQueueSize", loaderJobCount); prop.putNum("loaderQueueMax", loaderMaxCount); diff --git a/htroot/xml/queues_p.java b/htroot/xml/queues_p.java index 0661ead95..82105b2ac 100644 --- a/htroot/xml/queues_p.java +++ b/htroot/xml/queues_p.java @@ -88,7 +88,7 @@ public class queues_p { //indexing queue prop.putNum("indexingSize", sb.getThread(plasmaSwitchboard.INDEXER).getJobCount()+sb.indexingTasksInProcess.size()); - prop.putNum("indexingMax", plasmaSwitchboard.indexingSlots); + prop.putNum("indexingMax", (int) sb.getConfigLong(plasmaSwitchboard.INDEXER_SLOTS, 30)); prop.putNum("urlpublictextSize", sb.wordIndex.loadedURL.size()); prop.putNum("rwipublictextSize", sb.wordIndex.size()); if ((sb.sbQueue.size() == 0) && (sb.indexingTasksInProcess.size() == 0)) { @@ -140,7 +140,7 @@ public class queues_p { //loader queue prop.put("loaderSize", Integer.toString(sb.crawlQueues.size())); - prop.put("loaderMax", Integer.toString(plasmaSwitchboard.crawlSlots)); + prop.put("loaderMax", sb.getConfig(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, "10")); if (sb.crawlQueues.size() == 0) { prop.put("list-loader", "0"); } else { diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index ef4174acb..f72d3bce8 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -134,12 +134,12 @@ public class plasmaCrawlQueues { //log.logDebug("CoreCrawl: queue is empty"); return false; } - if (sb.sbQueue.size() >= plasmaSwitchboard.indexingSlots) { + if (sb.sbQueue.size() >= (int) sb.getConfigLong(plasmaSwitchboard.INDEXER_SLOTS, 30)) { log.logFine("CoreCrawl: too many processes in indexing queue, dismissed (" + "sbQueueSize=" + sb.sbQueue.size() + ")"); return false; } - if (this.size() >= plasmaSwitchboard.crawlSlots) { + if (this.size() >= sb.getConfigLong(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, 10)) { log.logFine("CoreCrawl: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + ")"); return false; @@ -230,12 +230,12 @@ public class plasmaCrawlQueues { // check local indexing queues // in case the placing of remote crawl fails, there must be space in the local queue to work off the remote crawl - if (sb.sbQueue.size() >= plasmaSwitchboard.indexingSlots * 2) { + if (sb.sbQueue.size() >= (int) sb.getConfigLong(plasmaSwitchboard.INDEXER_SLOTS, 30) * 2) { log.logFine("LimitCrawl: too many processes in indexing queue, dismissed (" + "sbQueueSize=" + sb.sbQueue.size() + ")"); return false; } - if (this.size() >= plasmaSwitchboard.crawlSlots) { + if (this.size() >= sb.getConfigLong(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, 10)) { log.logFine("LimitCrawl: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + ")"); return false; @@ -318,12 +318,12 @@ public class plasmaCrawlQueues { //log.logDebug("GlobalCrawl: queue is empty"); return false; } - if (sb.sbQueue.size() >= plasmaSwitchboard.indexingSlots) { + if (sb.sbQueue.size() >= (int) sb.getConfigLong(plasmaSwitchboard.INDEXER_SLOTS, 30)) { log.logFine("GlobalCrawl: too many processes in indexing queue, dismissed (" + "sbQueueSize=" + sb.sbQueue.size() + ")"); return false; } - if (this.size() >= plasmaSwitchboard.crawlSlots) { + if (this.size() >= sb.getConfigLong(plasmaSwitchboard.CRAWLER_THREADS_ACTIVE_MAX, 10)) { log.logFine("GlobalCrawl: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.size() + ")"); return false; diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 4164bce76..670ca4506 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -85,12 +85,14 @@ public final class plasmaCrawlStacker extends Thread { private File cacheStacksPath; private long preloadTime; private int dbtype; - + private boolean prequeue; + // objects for the prefetch task private ArrayList dnsfetchHosts = new ArrayList(); - public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, long preloadTime, int dbtype) { + public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, long preloadTime, int dbtype, boolean prequeue) { this.sb = sb; + this.prequeue = prequeue; // init the message list this.urlEntryHashCache = new LinkedList(); @@ -168,13 +170,11 @@ public final class plasmaCrawlStacker extends Thread { } public void close() { - try { + if (this.dbtype == QUEUE_DB_TYPE_RAM) { this.log.logFine("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait."); while (size() > 0) { if (!job()) break; } - } catch (Exception e1) { - this.log.logSevere("Unable to shutdown all remaining stackCrawl threads", e1); } terminateDNSPrefetcher(); @@ -240,7 +240,7 @@ public final class plasmaCrawlStacker extends Thread { synchronized(this.urlEntryHashCache) { kelondroRow.Entry oldValue; - prefetchHost(nexturl.getHost()); + if (prequeue) prefetchHost(nexturl.getHost()); try { oldValue = this.urlEntryCache.put(newEntry.toRow()); } catch (IOException e) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d35c79bec..6e555378c 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -175,9 +175,7 @@ import de.anomic.yacy.yacySeed; public final class plasmaSwitchboard extends serverAbstractSwitch implements serverSwitch { // load slots - public static int crawlSlots = 10; - public static int indexingSlots = 30; - public static int stackCrawlSlots = 2000; + public static int xstackCrawlSlots = 2000; private int dhtTransferIndexCount = 100; @@ -410,6 +408,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public static final String CRAWLSTACK_METHOD_FREEMEM = null; public static final String CRAWLSTACK_IDLESLEEP = "82_crawlstack_idlesleep"; public static final String CRAWLSTACK_BUSYSLEEP = "82_crawlstack_busysleep"; + public static final String CRAWLSTACK_SLOTS = "stacker.slots"; // 90_cleanup /** @@ -1170,9 +1169,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // create queue this.sbQueue = new plasmaSwitchboardQueue(this.wordIndex.loadedURL, new File(this.plasmaPath, "switchboardQueue2.stack"), this.profilesActiveCrawls); - // setting the indexing queue slots - indexingSlots = (int) getConfigLong(INDEXER_SLOTS, 30); - // create in process list this.indexingTasksInProcess = new HashMap(); @@ -1204,7 +1200,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // start a loader log.logConfig("Starting Crawl Loader"); - crawlSlots = Integer.parseInt(getConfig(CRAWLER_THREADS_ACTIVE_MAX, "10")); this.crawlQueues = new plasmaCrawlQueues(this, plasmaPath); /* @@ -1307,7 +1302,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // initializing the stackCrawlThread - this.crawlStacker = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL_time, (int) getConfigLong("tableTypeForPreNURL", 0)); + this.crawlStacker = new plasmaCrawlStacker(this, this.plasmaPath, ramPreNURL_time, (int) getConfigLong("tableTypeForPreNURL", 0), (((int) getConfigLong("tableTypeForPreNURL", 0) == 0) && (getConfigLong(CRAWLSTACK_BUSYSLEEP, 0) <= 100))); //this.sbStackCrawlThread = new plasmaStackCrawlThread(this,this.plasmaPath,ramPreNURL); //this.sbStackCrawlThread.start(); @@ -1850,7 +1845,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser return doneSomething; // nothing to do } - if (crawlStacker.size() >= stackCrawlSlots) { + if (crawlStacker.size() >= getConfigLong(CRAWLSTACK_SLOTS, 2000)) { log.logFine("deQueue: too many processes in stack crawl thread queue (" + "stackCrawlQueue=" + crawlStacker.size() + ")"); return doneSomething; } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 86252ad1b..c51746636 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -60,7 +60,7 @@ public final class plasmaWordIndex implements indexRI { // environment constants public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes - public static final int wCacheMaxChunk = 1000; // number of references for each urlhash + public static final int wCacheMaxChunk = 400; // maximum number of references for each urlhash public static final int lowcachedivisor = 320; public static final int maxCollectionPartition = 7; // should be 7 diff --git a/yacy.init b/yacy.init index 44ea51367..6b9f76986 100644 --- a/yacy.init +++ b/yacy.init @@ -734,6 +734,9 @@ crawler.MaxActiveThreads = 30 indexer.slots = 40 indexer.slots__pro = 80 +# maximum size of stacker queue +stacker.slots = 2000 + # specifies if yacy should set it's own referer if no referer URL # was set by the client. useYacyReferer = true @@ -888,6 +891,7 @@ currentSkin= # temporary flag for new database structure. set only true for testing # ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION # table-types: RAM = 0, TREE = 1, FLEX = 2; +# if you set this to a non-RAM value, you should increase the stacker.slots value tableTypeForPreNURL=0 # flag to show if pages shall be usable for non-admin users