diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index a5e5315ea..48293d705 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -281,12 +281,12 @@ public class IndexCreate_p { if (post.containsKey("pausecrawlqueue")) { - switchboard.pauseCrawling(); + switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); prop.put("info", 4);//crawling paused } if (post.containsKey("continuecrawlqueue")) { - switchboard.continueCrawling(); + switchboard.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); prop.put("info", 5);//crawling continued } } @@ -454,7 +454,7 @@ public class IndexCreate_p { } - prop.put("crawler-paused",(switchboard.crawlingIsPaused())?0:1); + prop.put("crawler-paused",(switchboard.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL))?0:1); // return rewrite properties return prop; diff --git a/htroot/ScreenSaver.java b/htroot/ScreenSaver.java index 2c565dcfc..23da0d560 100644 --- a/htroot/ScreenSaver.java +++ b/htroot/ScreenSaver.java @@ -74,7 +74,9 @@ public class ScreenSaver { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { plasmaSwitchboard sb = (plasmaSwitchboard)env; - boolean crawlingStarted = false; + boolean localCrawlStarted = false; + boolean remoteTriggeredCrawlStarted = false; + boolean globalCrawlTriggerStarted = false; try { InputStream input = (InputStream) header.get("INPUTSTREAM"); OutputStream output = (OutputStream) header.get("OUTPUTSTREAM"); @@ -110,10 +112,18 @@ public class ScreenSaver { outputWriter.println(currentURL); } else if (line.equals("CONTINUECRAWLING")) { - if (sb.crawlingIsPaused()) { - crawlingStarted = true; - sb.continueCrawling(); + if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL)) { + localCrawlStarted = true; + sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); } + if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) { + remoteTriggeredCrawlStarted = true; + sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + } + if (sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER)) { + globalCrawlTriggerStarted = true; + sb.continueCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); + } } else if (line.equals("EXIT")) { outputWriter.println("OK"); outputWriter.flush(); @@ -128,9 +138,15 @@ public class ScreenSaver { } catch (Exception e) { return null; } finally { - if (crawlingStarted) { - sb.pauseCrawling(); + if (localCrawlStarted) { + sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); } + if (remoteTriggeredCrawlStarted) { + sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + } + if (globalCrawlTriggerStarted) { + sb.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); + } } } diff --git a/htroot/Status.java b/htroot/Status.java index 4e8366c48..ce1b84f50 100644 --- a/htroot/Status.java +++ b/htroot/Status.java @@ -78,10 +78,22 @@ public class Status { prop.put("LOCATION",""); } } else { - if (post.containsKey("pausecrawlqueue")) { - ((plasmaSwitchboard)env).pauseCrawling(); - } else if (post.containsKey("continuecrawlqueue")) { - ((plasmaSwitchboard)env).continueCrawling(); + if (post.containsKey("pauseCrawlJob")) { + String jobType = (String) post.get("jobType"); + if (jobType.equals("localCrawl")) + ((plasmaSwitchboard)env).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); + else if (jobType.equals("remoteTriggeredCrawl")) + ((plasmaSwitchboard)env).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + else if (jobType.equals("globalCrawlTrigger")) + ((plasmaSwitchboard)env).pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); + } else if (post.containsKey("continueCrawlJob")) { + String jobType = (String) post.get("jobType"); + if (jobType.equals("localCrawl")) + ((plasmaSwitchboard)env).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); + else if (jobType.equals("remoteTriggeredCrawl")) + ((plasmaSwitchboard)env).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + else if (jobType.equals("globalCrawlTrigger")) + ((plasmaSwitchboard)env).continueCrawlJob(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER); } else if (post.containsKey("ResetTraffic")) { httpdByteCountInputStream.resetCount(); httpdByteCountOutputStream.resetCount(); @@ -278,13 +290,18 @@ public class Status { prop.put("indexingQueueMax", Integer.toString(plasmaSwitchboard.indexingSlots)); prop.put("loaderQueueSize", Integer.toString(sb.cacheLoader.size())); - prop.put("loaderQueueMax", Integer.toString(plasmaSwitchboard.crawlSlots)); - prop.put("loaderPaused",sb.crawlingIsPaused()?1:0); + prop.put("loaderQueueMax", Integer.toString(plasmaSwitchboard.crawlSlots)); - prop.put("localCrawlQueueSize", Integer.toString(sb.getThread("50_localcrawl").getJobCount())); + prop.put("localCrawlQueueSize", Integer.toString(sb.getThread(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL).getJobCount())); + prop.put("localCrawlPaused",sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL)?1:0); + + prop.put("remoteTriggeredCrawlQueueSize", Integer.toString(sb.getThread(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount())); + prop.put("remoteTriggeredCrawlPaused",sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)?1:0); + + prop.put("globalCrawlTriggerQueueSize", Integer.toString(sb.getThread(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER).getJobCount())); + prop.put("globalCrawlTriggerPaused",sb.crawlJobIsPaused(plasmaSwitchboard.CRAWLJOB_GLOBAL_CRAWL_TRIGGER)?1:0); + prop.put("stackCrawlQueueSize", Integer.toString(sb.sbStackCrawlThread.size())); - prop.put("remoteCrawlQueueSize", Integer.toString(sb.getThread("62_remotetriggeredcrawl").getJobCount())); - prop.put("remoteCrawlTriggerQueueSize", Integer.toString(sb.getThread("61_globalcrawltrigger").getJobCount())); // return rewrite properties prop.put("date",(new Date()).toString()); diff --git a/htroot/Status_p.inc b/htroot/Status_p.inc index 19e60cc03..436c8d193 100644 --- a/htroot/Status_p.inc +++ b/htroot/Status_p.inc @@ -1,4 +1,4 @@ - +
@@ -71,17 +71,42 @@ - + - - - - - - - - + + +
Private System Properties
Loader Queue#[loaderQueueSize]# | #[loaderQueueMax]# #(loaderPaused)#::(paused)#(/loaderPaused)# #[loaderQueueSize]# | #[loaderQueueMax]# [Details]
Crawler QueueEnqueued from: local=#[localCrawlQueueSize]# remote=#[remoteCrawlQueueSize]# | Pending: #[stackCrawlQueueSize]#[Details]
Remote Crawl Trigger Queue#[remoteCrawlTriggerQueueSize]#[Details]Crawler Queues + + + + + + + + + + + + + + + + + + + + + + + + + +
Local Crawl#[localCrawlQueueSize]##(localCrawlPaused)# ::(paused)#(/localCrawlPaused)#
Remote triggered Crawl#[remoteTriggeredCrawlQueueSize]##(remoteTriggeredCrawlPaused)# ::(paused)#(/remoteTriggeredCrawlPaused)#
Global Crawl Trigger#[globalCrawlTriggerQueueSize]##(globalCrawlTriggerPaused)# ::(paused)#(/globalCrawlTriggerPaused)#
Pending Crawl#[stackCrawlQueueSize]#  
+
[Details]
+  
+ [Details] +
\ No newline at end of file diff --git a/source/de/anomic/plasma/plasmaCrawlWorker.java b/source/de/anomic/plasma/plasmaCrawlWorker.java index def3467dd..d1a2e7634 100644 --- a/source/de/anomic/plasma/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/plasmaCrawlWorker.java @@ -530,7 +530,8 @@ public final class plasmaCrawlWorker extends Thread { } else if ((errorMsg != null) && (errorMsg.indexOf("There is not enough space on the disk") >= 0)) { log.logSevere("CRAWLER Not enough space on the disk detected while crawling '" + url.toString() + "'. " + "Pausing crawlers. "); - plasmaCrawlLoader.switchboard.pauseCrawling(); + plasmaCrawlLoader.switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); + plasmaCrawlLoader.switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); } else if ((errorMsg != null) && (errorMsg.indexOf("Network is unreachable") >=0)) { log.logSevere("CRAWLER Network is unreachable while trying to crawl URL '" + url.toString() + "'. "); } else if ((errorMsg != null) && (errorMsg.indexOf("No trusted certificate found")>= 0)) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c52b13b2e..e01e15c44 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -113,6 +113,7 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; +import java.util.Hashtable; import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -228,8 +229,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser private serverSemaphore shutdownSync = new serverSemaphore(0); private boolean terminate = false; - private Object crawlingPausedSync = new Object(); - private boolean crawlingIsPaused = false; + //private Object crawlingPausedSync = new Object(); + //private boolean crawlingIsPaused = false; + + public static final String CRAWLJOB_LOCAL_CRAWL = "50_localcrawl"; + public static final String CRAWLJOB_REMOTE_TRIGGERED_CRAWL = "62_remotetriggeredcrawl"; + public static final String CRAWLJOB_GLOBAL_CRAWL_TRIGGER = "61_globalcrawltrigger"; + private static final int CRAWLJOB_SYNC = 0; + private static final int CRAWLJOB_STATUS = 1; + + private Hashtable crawlJobsStatus = new Hashtable(); + private static plasmaSwitchboard sb; public plasmaSwitchboard(String rootPath, String initPath, String configPath) { @@ -421,11 +431,25 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // start a loader log.logConfig("Starting Crawl Loader"); - crawlSlots = Integer.parseInt(getConfig("crawler.MaxActiveThreads", "10")); - this.crawlingIsPaused = Boolean.valueOf(getConfig("crawler.isPaused", "false")).booleanValue(); plasmaCrawlLoader.switchboard = this; this.cacheLoader = new plasmaCrawlLoader(this.cacheManager, this.log); + + /* + * Creating sync objects and loading status for the crawl jobs + * a) local crawl + * b) remote triggered crawl + * c) global crawl trigger + */ + this.crawlJobsStatus.put(CRAWLJOB_LOCAL_CRAWL, new Object[]{ + new Object(), + Boolean.valueOf(getConfig(CRAWLJOB_LOCAL_CRAWL + "_isPaused", "false"))}); + this.crawlJobsStatus.put(CRAWLJOB_REMOTE_TRIGGERED_CRAWL, new Object[]{ + new Object(), + Boolean.valueOf(getConfig(CRAWLJOB_REMOTE_TRIGGERED_CRAWL + "_isPaused", "false"))}); + this.crawlJobsStatus.put(CRAWLJOB_GLOBAL_CRAWL_TRIGGER, new Object[]{ + new Object(), + Boolean.valueOf(getConfig(CRAWLJOB_GLOBAL_CRAWL_TRIGGER + "_isPaused", "false"))}); // starting board initMessages(ramMessage); @@ -956,32 +980,35 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser /** * With this function the crawling process can be paused */ - public void pauseCrawling() { - synchronized(this.crawlingPausedSync) { - this.crawlingIsPaused = true; + public void pauseCrawlJob(String jobType) { + Object[] status = (Object[])this.crawlJobsStatus.get(jobType); + synchronized(status[CRAWLJOB_SYNC]) { + status[CRAWLJOB_STATUS] = Boolean.TRUE; } - setConfig("crawler.isPaused", "true"); - } + setConfig(jobType + "_isPaused", "true"); + } /** * Continue the previously paused crawling */ - public void continueCrawling() { - synchronized(this.crawlingPausedSync) { - if (this.crawlingIsPaused) { - this.crawlingIsPaused = false; - this.crawlingPausedSync.notifyAll(); + public void continueCrawlJob(String jobType) { + Object[] status = (Object[])this.crawlJobsStatus.get(jobType); + synchronized(status[CRAWLJOB_SYNC]) { + if (((Boolean)status[CRAWLJOB_STATUS]).booleanValue()) { + status[CRAWLJOB_STATUS] = Boolean.FALSE; + status[CRAWLJOB_SYNC].notifyAll(); } } - setConfig("crawler.isPaused", "false"); - } + setConfig(jobType + "_isPaused", "false"); + } /** * @return true if crawling was paused or false otherwise */ - public boolean crawlingIsPaused() { - synchronized(this.crawlingPausedSync) { - return this.crawlingIsPaused; + public boolean crawlJobIsPaused(String jobType) { + Object[] status = (Object[])this.crawlJobsStatus.get(jobType); + synchronized(status[CRAWLJOB_SYNC]) { + return ((Boolean)status[CRAWLJOB_STATUS]).booleanValue(); } } @@ -1012,10 +1039,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} // if crawling was paused we have to wait until we wer notified to continue - synchronized(this.crawlingPausedSync) { - if (this.crawlingIsPaused) { + Object[] status = (Object[])this.crawlJobsStatus.get(CRAWLJOB_LOCAL_CRAWL); + synchronized(status[CRAWLJOB_SYNC]) { + if (((Boolean)status[CRAWLJOB_STATUS]).booleanValue()) { try { - this.crawlingPausedSync.wait(); + status[CRAWLJOB_SYNC].wait(); } catch (InterruptedException e){ return false;} } @@ -1090,10 +1118,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser //if (!(cacheManager.idle())) try {Thread.currentThread().sleep(2000);} catch (InterruptedException e) {} // if crawling was paused we have to wait until we wer notified to continue - synchronized(this.crawlingPausedSync) { - if (this.crawlingIsPaused) { + Object[] status = (Object[])this.crawlJobsStatus.get(CRAWLJOB_GLOBAL_CRAWL_TRIGGER); + synchronized(status[CRAWLJOB_SYNC]) { + if (((Boolean)status[CRAWLJOB_STATUS]).booleanValue()) { try { - this.crawlingPausedSync.wait(); + status[CRAWLJOB_SYNC].wait(); } catch (InterruptedException e){ return false;} } @@ -1152,12 +1181,13 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } // if crawling was paused we have to wait until we wer notified to continue - synchronized(this.crawlingPausedSync) { - if (this.crawlingIsPaused) { + Object[] status = (Object[])this.crawlJobsStatus.get(CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + synchronized(status[CRAWLJOB_SYNC]) { + if (((Boolean)status[CRAWLJOB_STATUS]).booleanValue()) { try { - this.crawlingPausedSync.wait(); + status[CRAWLJOB_SYNC].wait(); } - catch (InterruptedException e){ return false; } + catch (InterruptedException e){ return false;} } } diff --git a/yacy.init b/yacy.init index 34f4bae80..7e0b8de69 100644 --- a/yacy.init +++ b/yacy.init @@ -429,12 +429,15 @@ xpstopw=true 50_localcrawl_idlesleep=10000 50_localcrawl_busysleep=200 50_localcrawl_memprereq=1048576 +50_localcrawl_isPaused=false 61_globalcrawltrigger_idlesleep=10000 61_globalcrawltrigger_busysleep=200 61_globalcrawltrigger_memprereq=1048576 +61_globalcrawltrigger_isPaused=false 62_remotetriggeredcrawl_idlesleep=10000 62_remotetriggeredcrawl_busysleep=200 62_remotetriggeredcrawl_memprereq=1048576 +62_remotetriggeredcrawl_isPaused=false 70_cachemanager_idlesleep=5000 70_cachemanager_busysleep=0 70_cachemanager_memprereq=1048576 @@ -561,7 +564,6 @@ onlineCautionDelay=30000 crawler.acceptLanguage=en-us,en;q=0.5 crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 crawler.clientTimeout=9000 -crawler.isPaused=false # maximum number of crawler threads crawler.MaxActiveThreads = 10