2016-10-03 21:57:02 +02:00
<!DOCTYPE html>
2016-10-07 10:46:20 +02:00
< html >
2007-01-23 16:35:36 +01:00
< head >
2012-05-25 01:45:38 +02:00
< title > YaCy '#[clientname]#': Crawler< / title >
2007-01-23 16:35:36 +01:00
#%env/templates/metas.template%#
2014-02-10 21:40:42 +01:00
< script type = "text/javascript" src = "js/ajax.js" > < / script >
< script type = "text/javascript" src = "js/xml.js" > < / script >
< script type = "text/javascript" src = "js/html.js" > < / script >
< script type = "text/javascript" src = "js/rss2.js" > < / script >
< script type = "text/javascript" src = "js/query.js" > < / script >
< script type = "text/javascript" src = "js/Crawler.js" > < / script >
2014-04-03 14:51:19 +02:00
<!-- style for hypertree -->
< link href = "env/hypertree.css" rel = "stylesheet" >
2012-02-02 21:33:42 +01:00
< / head >
2012-05-25 01:45:38 +02:00
< body id = "Crawler" onload = "initCrawler();" >
2012-11-07 12:52:19 +01:00
2014-03-18 13:42:31 +01:00
#%env/templates/header.template%#
#%env/templates/submenuCrawlMonitor.template%#
2012-11-07 12:52:19 +01:00
< div id = "api" >
2014-02-10 21:40:42 +01:00
< a href = "api/status_p.xml" id = "apilink" > < img src = "env/grafics/api.png" width = "60" height = "40" alt = "API" / > < / a >
2012-11-07 12:52:19 +01:00
< span > Click on this API button to see an XML with information about the crawler status< / span >
< / div >
2012-05-25 01:45:38 +02:00
< h2 > Crawler< / h2 >
2009-02-06 15:45:56 +01:00
< noscript > < p > (Please enable JavaScript to automatically update this page!)< / p > < / noscript >
2014-04-22 23:14:54 +02:00
< fieldset id = "queues" style = "width:210px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Queues< / legend >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2016-10-03 21:55:38 +02:00
< thead >
2007-01-23 16:35:36 +01:00
< tr class = "TableHeader" >
2014-04-22 23:14:54 +02:00
< th width = "120" > Queue< br / > < / th >
< th width = "60" > Size< br / > < / th >
2016-10-03 21:55:38 +02:00
< th width = "30" > < span class = "glyphicon glyphicon-wrench" > < / span > < / th >
2007-01-23 16:35:36 +01:00
< / tr >
2016-10-03 21:55:38 +02:00
< / thead >
< tbody >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
2015-04-25 02:45:05 +02:00
< td align = "left" > < a href = "IndexCreateQueues_p.html?stack=LOCAL" > Local Crawler< / a > < / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "localcrawlerqueuesize" > #[localCrawlSize]#< / span > < / td >
2007-02-22 23:26:11 +01:00
< td >
< a href = "" id = "localcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "localcrawlerstateIMG" / >
< / a >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2007-11-29 03:07:37 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Limit Crawler< / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "limitcrawlerqueuesize" > #[limitCrawlSize]#< / span > < / td >
2007-11-29 03:07:37 +01:00
< td >
< a href = "" title = "" id = "limitcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "limitcrawlerstateIMG" / >
< / a >
< / td >
< / tr >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
2015-04-25 02:45:05 +02:00
< td align = "left" > < a href = "IndexCreateQueues_p.html?stack=REMOTE" > Remote Crawler< / a > < / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "remotecrawlerqueuesize" > #[remoteCrawlSize]#< / span > < / td >
2007-02-22 23:26:11 +01:00
< td >
< a href = "" title = "" id = "remotecrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "remotecrawlerstateIMG" / >
< / a >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2012-01-07 17:17:11 +01:00
< tr class = "TableCellLight" >
2015-04-25 02:45:05 +02:00
< td align = "left" > < a href = "IndexCreateQueues_p.html?stack=NOLOAD" > No-Load Crawler< / a > < / td >
2012-01-07 17:17:11 +01:00
< td align = "right" > < span id = "noloadcrawlerqueuesize" > #[noloadCrawlSize]#< / span > < / td >
< td >
< a href = "" title = "" id = "noloadcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "noloadcrawlerstateIMG" / >
< / a >
< / td >
< / tr >
2010-04-27 23:47:41 +02:00
< tr class = "TableCellLight" >
2015-04-25 02:45:05 +02:00
< td align = "left" > < a href = "IndexCreateLoaderQueue_p.html" > Loader< / a > (< a href = "PerformanceQueues_p.html#ThreadPoolSettings" > < span id = "loaderqueuemax" > #[loaderMax]#< / span > < / a > )< / td >
2010-04-27 23:47:41 +02:00
< td align = "right" > < span id = "loaderqueuesize" > #[loaderSize]#< / span > < / td >
< td > < / td >
< / tr >
2007-01-23 16:35:36 +01:00
< / tbody >
< / table >
2014-04-22 23:14:54 +02:00
#(terminate-button)#::
2016-10-25 03:02:31 +02:00
< form action = "Crawler_p.html" method = "get" >
2014-04-22 23:14:54 +02:00
< input type = "hidden" name = "queues_terminate_all" value = "" / >
2014-04-29 16:22:31 +02:00
< button type = "submit" class = "btn btn-danger" onclick = "return confirm('Confirm Termination of All Crawls')" > < span class = "glyphicon glyphicon-remove-circle" > < / span > Terminate All< / button >
2014-04-22 23:14:54 +02:00
< / form >
#(/terminate-button)#
2012-05-23 18:00:37 +02:00
< / fieldset >
2014-04-22 23:14:54 +02:00
< fieldset id = "indexsize" style = "width:240px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Index Size< / legend >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2016-10-03 21:55:38 +02:00
< thead >
2007-01-23 16:35:36 +01:00
< tr class = "TableHeader" >
2014-04-22 23:14:54 +02:00
< th width = "130" > Database< br / > < / th >
< th width = "50" > Entries< br / > < / th >
2013-07-23 19:10:11 +02:00
< th width = "40" > Seg-< br / > ments< / th >
2007-01-23 16:35:36 +01:00
< / tr >
2016-10-03 21:55:38 +02:00
< / thead >
< tbody >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Documents< br / > < a href = "#[urlpublictextSolrURL]#" > solr search api< / a > < / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "urlpublictextSize" > #[urlpublictextSize]#< / span > < / td >
< td align = "right" > < span id = "urlpublictextSegmentCount" > #[urlpublictextSegmentCount]#< / span > < / td >
2007-01-23 16:35:36 +01:00
< / tr >
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Webgraph Edges< br / > < a href = "#[webgraphSolrURL]#" > solr search api< / a > < / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "webgraphSize" > #[webgraphSize]#< / span > < / td >
< td align = "right" > < span id = "webgraphSegmentCount" > #[webgraphSegmentCount]#< / span > < / td >
2013-02-22 15:45:15 +01:00
< / tr >
2013-11-16 08:23:42 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Citations< br / > (reverse link index)< / td >
< td align = "right" > < span id = "citationSize" > #[citationSize]#< / span > < / td >
< td align = "right" > < span id = "citationSegmentCount" > #[citationSegmentCount]#< / span > < / td >
< / tr >
2013-02-22 15:45:15 +01:00
< tr class = "TableCellLight" >
< td align = "left" > RWIs< br / > (P2P Chunks)< / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "rwipublictextSize" > #[rwipublictextSize]#< / span > < / td >
< td align = "right" > < span id = "rwipublictextSegmentCount" > #[rwipublictextSegmentCount]#< / span > < / td >
2007-01-23 16:35:36 +01:00
< / tr >
< / tbody >
< / table >
2012-05-23 18:00:37 +02:00
< / fieldset >
2017-01-05 14:54:59 +01:00
< fieldset id = "progress" style = "width:530px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Progress< / legend >
2014-01-21 19:28:00 +01:00
< form action = "Crawler_p.html" method = "get" enctype = "multipart/form-data" accept-charset = "UTF-8" >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2016-10-03 21:55:38 +02:00
< thead >
2007-01-23 16:35:36 +01:00
< tr class = "TableHeader" >
2014-02-28 13:58:05 +01:00
< th width = "160" > Indicator< br / > < / th >
2013-07-23 19:10:11 +02:00
< th width = "300" colspan = "4" > Level< br / > < / th >
2007-01-23 16:35:36 +01:00
< / tr >
2016-10-03 21:55:38 +02:00
< / thead >
< tbody >
2012-02-02 21:33:42 +01:00
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Speed / PPM< br / > (Pages Per Minute)< / td >
2013-07-23 19:10:11 +02:00
< td align = "left" colspan = "4" >
2017-01-05 14:54:59 +01:00
< input id = "customPPM" name = "customPPM" type = "number" min = "10" max = "30000" style = "width:5em" value = "#[customPPMdefault]#" / > < label for = "customPPM" > < abbr title = "Pages Per Minute" > PPM< / abbr > < / label >
< input id = "latencyFactor" name = "latencyFactor" type = "number" min = "0.1" max = "3.0" step = "0.1" style = "width:3.5em" value = "#[latencyFactorDefault]#" / >
< label for = "latencyFactor" > < abbr title = "Latency Factor" > LF< / abbr > < / label >
< input id = "MaxSameHostInQueue" name = "MaxSameHostInQueue" type = "number" min = "1" max = "30" style = "width:3em" value = "#[MaxSameHostInQueueDefault]#" / >
< label for = "MaxSameHostInQueue" > < abbr title = "Max same Host in queue" > MH< / abbr > < / label >
2014-01-21 19:28:00 +01:00
< input type = "submit" name = "crawlingPerformance" value = "set" / >
2017-01-05 14:54:59 +01:00
(< a href = "Crawler_p.html?crawlingPerformance=minimum" title = "Set PPM to the default minimum value" > min< / a > /< a href = "Crawler_p.html?crawlingPerformance=maximum" title = "Set PPM to the default maximum value" > max< / a > )
2012-02-02 21:33:42 +01:00
< / td >
< / tr >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Crawler PPM< / td >
2014-02-27 04:43:17 +01:00
< td align = "left" width = "60" > < span id = "ppmNum" > < / span > < / td >
2013-07-23 19:10:11 +02:00
< td align = "left" width = "260px" colspan = "3" >
2013-03-17 11:43:12 +01:00
< progress id = "ppmbar" max = "30000" value = "0" style = "width:94%;" / >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2013-11-16 18:23:14 +01:00
< tr class = "TableCellLight" >
2014-02-28 13:58:05 +01:00
< td align = "left" valign = "top" rowspan = "2" > Postprocessing Progress < span id = "postprocessing_speed" > < / span > < br / > < span id = "postprocessing_status" > < / span > < / td >
< td align = "left" width = "40" > < span id = "postprocessing_remainingTimeMinutes" > 0< / span > :< span id = "postprocessing_remainingTimeSeconds" > 0< / span > < / td >
2013-11-16 18:23:14 +01:00
< td align = "left" width = "260px" colspan = "3" >
2014-12-27 03:02:18 +01:00
< span id = "postprocessing_bar" > < progress id = "postprocessingBar" max = "100" value = "0" style = "width:94%;" / > < / span >
2013-11-16 18:23:14 +01:00
< / td >
< / tr >
< tr class = "TableCellLight" >
2014-02-28 13:58:05 +01:00
< td align = "left" > pending:< / td >
< td align = "left" > collection=< span id = "postprocessing_collection" > < / span > < / td >
< td align = "left" > webgraph=< span id = "postprocessing_webgraph" > < / span > < / td >
2015-02-04 03:51:34 +01:00
< td > < / td >
2013-11-16 18:23:14 +01:00
< / tr >
2007-02-22 23:26:11 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Traffic (Crawler)< / td >
2015-02-04 03:51:34 +01:00
< td align = "left" colspan = "2" > < span id = "trafficCrawler" > < / span > MB< / td >
< td colspan = "2" > < / td >
2013-07-23 19:10:11 +02:00
< / tr >
< tr class = "TableCellLight" >
< td align = "left" > Load< / td >
2015-02-04 03:51:34 +01:00
< td align = "left" colspan = "2" > < span id = "load" > < / span > < / td >
< td colspan = "2" > < / td >
2013-11-16 08:23:42 +01:00
< / tr >
2007-01-23 16:35:36 +01:00
< / tbody >
< / table >
2012-02-02 21:33:42 +01:00
< / form >
2012-05-25 01:45:38 +02:00
< / fieldset >
2014-04-22 23:14:54 +02:00
2014-03-16 14:56:30 +01:00
< script >
2015-01-02 00:11:32 +01:00
function setTableSize() {
var maxh = Math.max(document.getElementById("progress").children[1].clientHeight, document.getElementById("indexsize").children[1].clientHeight, document.getElementById("queues").children[1].clientHeight) + 42;
if(lastMaxh !== maxh) {
var lastMaxh = maxh;
document.getElementById("indexsize").style.height = maxh + "px";
document.getElementById("progress").style.height = maxh + "px";
document.getElementById("queues").style.height = maxh + "px";
}
}
window.setInterval("setTableSize()", 1000);
2014-03-16 14:56:30 +01:00
< / script >
2012-05-25 01:45:38 +02:00
< p class = "watchCrawler" style = "clear:both;" >
#(info)#
2007-01-23 16:35:36 +01:00
<!-- 0 -->
::
<!-- 1 -->
Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db
and restart. ::
<!-- 2 -->
Error: #[errmsg]# ::
<!-- 3 -->
Application not yet initialized. Sorry. Please wait some seconds and repeat
the request. ::
<!-- 4 -->
< strong > ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with
crawl root "#[crawlingStart]#".< / strong > Please try again with different
filter. ::
<!-- 5 -->
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#< br >
::
<!-- 6 -->
Error with URL input "#[crawlingStart]#": #[error]# ::
<!-- 7 -->
Error with file input "#[crawlingStart]#": #[error]# ::
<!-- 8 -->
Crawling of "#[crawlingURL]#" started. < strong > Please wait some seconds,
it may take some seconds until the first result appears there.< / strong >
2018-06-19 10:12:20 +02:00
If you crawl any un-wanted pages, you can delete them < a href = "IndexCreateQueues_p.html?stack=LOCAL" > here< / a > .< br / > ::
<!-- 9 -->
No embedded local Solr index is connected. This is required to use a Solr query filter.
You can configure this with the < a href = "IndexFederated_p.html" > Index Sources & targets< / a > page.::
<!-- 10 -->
The Solr filter query syntax is not valid : < code > #[solrQuery]#< / code > ::
<!-- 11 -->
Could not parse the Solr filter query : < code > #[solrQuery]#< / code >
2012-05-25 01:45:38 +02:00
#(/info)#
< / p >
2018-06-19 10:12:20 +02:00
2018-07-06 11:30:30 +02:00
#(wontReceiptRemoteResults)#::
< div class = "alert alert-warning" >
< p > You asked for remote indexing, but remote crawl results won't be added to the local index as the remote crawler is currently disabled on this peer.< / p >
< p > You can activate it in the < a href = "RemoteCrawl_p.html" > Remote Crawl Configuration< / a > page.< / p >
< / div >
#(/wontReceiptRemoteResults)#
2018-06-19 10:12:20 +02:00
<!-- #(noEmbeddedSolr)#::<div class="alert alert - error">No embedded local Solr index is connected. This is required to use the Solr filter query.
You can configure this with the < a href = "IndexFederated_p.html" > Index Sources & targets< / a > page.< / div >
#(/noEmbeddedSolr)#
#(solrQuerySyntaxtError)#::< div class = "alert alert-error" > The Solr filter query syntax is not valid : #[solrQuery]#< / div >
#(/solrQuerySyntaxtError)#-->
2007-01-23 16:35:36 +01:00
<!-- crawl queues -->
2014-04-22 23:14:54 +02:00
#(info-queue)#::< div class = "alert alert-warning" > #[message]#< / div > #(/info-queue)#
2012-05-23 18:00:37 +02:00
<!-- crawl profile list -->
#(crawlProfilesShow)#::
< fieldset >
2016-10-04 03:55:49 +02:00
< legend id = "runningCrawlsLegend" > Running Crawls (#[count]#)< / legend >
2015-01-28 03:59:01 +01:00
< table width = "96%" >
< tr > < td >
2016-10-03 21:55:38 +02:00
< table border = "0" summary = "A list of crawl profiles and their current settings." id = "crawlProfiles" >
2015-01-28 03:59:01 +01:00
< colgroup >
2016-10-03 21:55:38 +02:00
< col width = "16" / >
< col width = "140" / >
2015-01-28 03:59:01 +01:00
< / colgroup >
2016-10-03 21:55:38 +02:00
< thead >
< tr class = "TableHeader" >
< th > < strong > Name< / strong > < / th >
2016-10-04 03:55:49 +02:00
#(debug)#::< th id = "headerDebug" > < strong > Count< / strong > < / th > #(/debug)#
2016-10-03 21:55:38 +02:00
< th > < strong > Status< / strong > < / th >
< / tr >
< / thead >
< tbody >
2015-01-28 03:59:01 +01:00
#{list}#
2016-10-04 03:55:49 +02:00
< tr class = "TableCell#(dark)#Light::Dark#(/dark)#" id = "#[handle]#" >
2016-10-03 21:55:38 +02:00
< td > #[name]#< / td >
#(debug)#::< td > #[count]#< / td > #(/debug)#
2016-10-04 03:55:49 +02:00
< td id = "#[handle]#_status_cell" > #(terminateButton)#::
< div id = "#[handle]#_status" style = "text-decoration:blink;float:left;" > Running< / div >
< form id = "#[handle]#_terminate" style = "float:left;" action = "Crawler_p.html" method = "get" enctype = "multipart/form-data" accept-charset = "UTF-8" >
2016-10-03 21:55:38 +02:00
< div >
< input type = "hidden" name = "handle" value = "#[handle]#" / >
< input type = "submit" name = "terminate" value = "Terminate" class = "btn btn-danger btn-xs" / >
< / div >
< / form >
#(/terminateButton)#
< / td >
< / tr >
#{/list}#
< / tbody >
2015-01-28 03:59:01 +01:00
< / table >
< / td >
#(linkstructure)#
< td >
< form style = "float:right;" action = "Crawler_p.html" > < input type = "submit" name = "showwebstructuregraph" class = "btn btn-default btn-xs" value = "show link structure" / > < form >
< / td > < / tr > < / table >
::
< td >
< form style = "float:right;" action = "Crawler_p.html" > < input type = "submit" name = "hidewebstructuregraph" class = "btn btn-default btn-xs" value = "hide graphic" / > < form >
< / td > < / tr > < / table >
2018-10-28 10:07:46 +01:00
< script src = "js/d3.v5.min.js" > < / script >
2016-11-08 03:05:51 +01:00
< script src = "js/hypertree.js" > < / script >
2014-04-03 14:51:19 +02:00
< div id = "linkstructure" > < / div >
2014-04-04 12:48:55 +02:00
< script > $ ( document ) . ready ( linkstructure ( "#[hosts]#" , "#linkstructure" , 1280 , 720 , 3000 , 700 ) ) ; < / script > ::
2015-01-28 03:59:01 +01:00
< td >
< form style = "float:right;" action = "Crawler_p.html" > < input type = "submit" name = "hidewebstructuregraph" class = "btn btn-default btn-xs" value = "hide graphic" / > < form >
< / td > < / tr > < / table >
2012-10-23 02:50:26 +02:00
< script type = "text/javascript" >
2012-10-25 10:18:28 +02:00
imagestub = "WebStructurePicture_p.png?host=#[hosts]#&depth=4&width=1024&height=512&nodes=600&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333";
2012-10-23 02:50:26 +02:00
idx = 0;
2012-10-23 18:11:19 +02:00
setTimeout("doanimation(500)", 500);
function doanimation(nexttimeout) {
2012-10-23 02:50:26 +02:00
var accessPicture = document.getElementById("WebPicture");
if (accessPicture != null) {
idx++;
accessPicture.src = imagestub + "& idx=" + idx;
2012-10-23 18:11:19 +02:00
setTimeout("doanimation(" + (nexttimeout > 3000 ? 3000 : nexttimeout * 1.2) + ")", nexttimeout);
2012-10-23 02:50:26 +02:00
}
}
< / script >
< div style = "clear:both; text-align:left;" >
2014-02-10 21:40:42 +01:00
< img id = "WebPicture" src = "env/grafics/invisible.png" / >
2012-10-23 02:50:26 +02:00
< / div >
#(/linkstructure)#
2012-05-25 01:45:38 +02:00
< h3 > Crawled Pages< / h3 >
< p id = "crawllist" > < / p >
2012-05-23 18:00:37 +02:00
< / fieldset >
#(/crawlProfilesShow)#
2007-01-23 16:35:36 +01:00
2012-05-25 01:45:38 +02:00
2007-01-23 16:35:36 +01:00
#%env/templates/footer.template%#
< / body >
< / html >