2007-01-23 16:35:36 +01:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" >
< head >
2012-05-25 01:45:38 +02:00
< title > YaCy '#[clientname]#': Crawler< / title >
2007-01-23 16:35:36 +01:00
#%env/templates/metas.template%#
2014-02-10 21:40:42 +01:00
< script type = "text/javascript" src = "js/ajax.js" > < / script >
< script type = "text/javascript" src = "js/xml.js" > < / script >
< script type = "text/javascript" src = "js/html.js" > < / script >
< script type = "text/javascript" src = "js/rss2.js" > < / script >
< script type = "text/javascript" src = "js/query.js" > < / script >
< script type = "text/javascript" src = "js/Crawler.js" > < / script >
2014-04-03 14:51:19 +02:00
<!-- style for hypertree -->
< link href = "env/hypertree.css" rel = "stylesheet" >
2012-02-02 21:33:42 +01:00
< / head >
2012-05-25 01:45:38 +02:00
< body id = "Crawler" onload = "initCrawler();" >
2012-11-07 12:52:19 +01:00
2014-03-18 13:42:31 +01:00
#%env/templates/header.template%#
#%env/templates/submenuCrawlMonitor.template%#
2012-11-07 12:52:19 +01:00
< div id = "api" >
2014-02-10 21:40:42 +01:00
< a href = "api/status_p.xml" id = "apilink" > < img src = "env/grafics/api.png" width = "60" height = "40" alt = "API" / > < / a >
2012-11-07 12:52:19 +01:00
< span > Click on this API button to see an XML with information about the crawler status< / span >
< / div >
2012-05-25 01:45:38 +02:00
< h2 > Crawler< / h2 >
2009-02-06 15:45:56 +01:00
< noscript > < p > (Please enable JavaScript to automatically update this page!)< / p > < / noscript >
2014-04-22 23:14:54 +02:00
< fieldset id = "queues" style = "width:210px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Queues< / legend >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2007-01-23 16:35:36 +01:00
< tbody >
< tr class = "TableHeader" >
2014-04-22 23:14:54 +02:00
< th width = "120" > Queue< br / > < / th >
< th width = "60" > Size< br / > < / th >
< th width = "30" > < span class = "glyphicon glyphicon-wrench" > < / span >
< / th >
2007-01-23 16:35:36 +01:00
< / tr >
< tr class = "TableCellLight" >
< td align = "left" > Local Crawler< / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "localcrawlerqueuesize" > #[localCrawlSize]#< / span > < / td >
2007-02-22 23:26:11 +01:00
< td >
< a href = "" id = "localcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "localcrawlerstateIMG" / >
< / a >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2007-11-29 03:07:37 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Limit Crawler< / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "limitcrawlerqueuesize" > #[limitCrawlSize]#< / span > < / td >
2007-11-29 03:07:37 +01:00
< td >
< a href = "" title = "" id = "limitcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "limitcrawlerstateIMG" / >
< / a >
< / td >
< / tr >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Remote Crawler< / td >
2008-06-14 12:24:58 +02:00
< td align = "right" > < span id = "remotecrawlerqueuesize" > #[remoteCrawlSize]#< / span > < / td >
2007-02-22 23:26:11 +01:00
< td >
< a href = "" title = "" id = "remotecrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "remotecrawlerstateIMG" / >
< / a >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2012-01-07 17:17:11 +01:00
< tr class = "TableCellLight" >
< td align = "left" > No-Load Crawler< / td >
< td align = "right" > < span id = "noloadcrawlerqueuesize" > #[noloadCrawlSize]#< / span > < / td >
< td >
< a href = "" title = "" id = "noloadcrawlerstateA" >
< img src = "" alt = "" style = "width:12px; height:12px;" id = "noloadcrawlerstateIMG" / >
< / a >
< / td >
< / tr >
2010-04-27 23:47:41 +02:00
< tr class = "TableCellLight" >
2012-05-23 18:00:37 +02:00
< td align = "left" > Loader (< span id = "loaderqueuemax" > #[loaderMax]#< / span > )< / td >
2010-04-27 23:47:41 +02:00
< td align = "right" > < span id = "loaderqueuesize" > #[loaderSize]#< / span > < / td >
< td > < / td >
< / tr >
2007-01-23 16:35:36 +01:00
< / tbody >
< / table >
2014-04-22 23:14:54 +02:00
#(terminate-button)#::
< form action = "/Crawler_p.html" method = "get" role = "form" >
< input type = "hidden" name = "queues_terminate_all" value = "" / >
2014-04-29 16:22:31 +02:00
< button type = "submit" class = "btn btn-danger" onclick = "return confirm('Confirm Termination of All Crawls')" > < span class = "glyphicon glyphicon-remove-circle" > < / span > Terminate All< / button >
2014-04-22 23:14:54 +02:00
< / form >
#(/terminate-button)#
2012-05-23 18:00:37 +02:00
< / fieldset >
2014-04-22 23:14:54 +02:00
< fieldset id = "indexsize" style = "width:240px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Index Size< / legend >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2007-01-23 16:35:36 +01:00
< tbody >
< tr class = "TableHeader" >
2014-04-22 23:14:54 +02:00
< th width = "130" > Database< br / > < / th >
< th width = "50" > Entries< br / > < / th >
2013-07-23 19:10:11 +02:00
< th width = "40" > Seg-< br / > ments< / th >
2007-01-23 16:35:36 +01:00
< / tr >
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Documents< br / > < a href = "#[urlpublictextSolrURL]#" > solr search api< / a > < / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "urlpublictextSize" > #[urlpublictextSize]#< / span > < / td >
< td align = "right" > < span id = "urlpublictextSegmentCount" > #[urlpublictextSegmentCount]#< / span > < / td >
2007-01-23 16:35:36 +01:00
< / tr >
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Webgraph Edges< br / > < a href = "#[webgraphSolrURL]#" > solr search api< / a > < / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "webgraphSize" > #[webgraphSize]#< / span > < / td >
< td align = "right" > < span id = "webgraphSegmentCount" > #[webgraphSegmentCount]#< / span > < / td >
2013-02-22 15:45:15 +01:00
< / tr >
2013-11-16 08:23:42 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Citations< br / > (reverse link index)< / td >
< td align = "right" > < span id = "citationSize" > #[citationSize]#< / span > < / td >
< td align = "right" > < span id = "citationSegmentCount" > #[citationSegmentCount]#< / span > < / td >
< / tr >
2013-02-22 15:45:15 +01:00
< tr class = "TableCellLight" >
< td align = "left" > RWIs< br / > (P2P Chunks)< / td >
2013-07-23 19:10:11 +02:00
< td align = "right" > < span id = "rwipublictextSize" > #[rwipublictextSize]#< / span > < / td >
< td align = "right" > < span id = "rwipublictextSegmentCount" > #[rwipublictextSegmentCount]#< / span > < / td >
2007-01-23 16:35:36 +01:00
< / tr >
< / tbody >
< / table >
2012-05-23 18:00:37 +02:00
< / fieldset >
2014-03-16 14:56:30 +01:00
< fieldset id = "progress" style = "width:500px;float:left;" >
2012-05-23 18:00:37 +02:00
< legend > Progress< / legend >
2014-01-21 19:28:00 +01:00
< form action = "Crawler_p.html" method = "get" enctype = "multipart/form-data" accept-charset = "UTF-8" >
2014-03-31 01:15:35 +02:00
< table border = "0" class = "watchCrawler" >
2007-01-23 16:35:36 +01:00
< tbody >
< tr class = "TableHeader" >
2014-02-28 13:58:05 +01:00
< th width = "160" > Indicator< br / > < / th >
2013-07-23 19:10:11 +02:00
< th width = "300" colspan = "4" > Level< br / > < / th >
2007-01-23 16:35:36 +01:00
< / tr >
2012-02-02 21:33:42 +01:00
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Speed / PPM< br / > (Pages Per Minute)< / td >
2013-07-23 19:10:11 +02:00
< td align = "left" colspan = "4" >
2014-02-27 04:43:17 +01:00
< input id = "customPPM" name = "customPPM" type = "number" min = "10" max = "30000" style = "width:62px" value = "#[customPPMdefault]#" / > PPM
< input id = "latencyFactor" name = "latencyFactor" type = "number" min = "0.1" max = "3.0" step = "0.1" style = "width:40px" value = "#[latencyFactorDefault]#" / > LF
2014-01-21 19:28:00 +01:00
< input id = "MaxSameHostInQueue" name = "MaxSameHostInQueue" type = "number" min = "1" max = "30" style = "width:32px" value = "#[MaxSameHostInQueueDefault]#" / > MH
< input type = "submit" name = "crawlingPerformance" value = "set" / >
2014-02-10 21:40:42 +01:00
(< a href = "Crawler_p.html?crawlingPerformance=minimum" > min< / a > /< a href = "Crawler_p.html?crawlingPerformance=maximum" > max< / a > )
2012-02-02 21:33:42 +01:00
< / td >
< / tr >
2007-01-23 16:35:36 +01:00
< tr class = "TableCellLight" >
2013-11-16 18:23:14 +01:00
< td align = "left" > Crawler PPM< / td >
2014-02-27 04:43:17 +01:00
< td align = "left" width = "60" > < span id = "ppmNum" > < / span > < / td >
2013-07-23 19:10:11 +02:00
< td align = "left" width = "260px" colspan = "3" >
2013-03-17 11:43:12 +01:00
< progress id = "ppmbar" max = "30000" value = "0" style = "width:94%;" / >
< / td >
2007-01-23 16:35:36 +01:00
< / tr >
2013-11-16 18:23:14 +01:00
< tr class = "TableCellLight" >
2014-02-28 13:58:05 +01:00
< td align = "left" valign = "top" rowspan = "2" > Postprocessing Progress < span id = "postprocessing_speed" > < / span > < br / > < span id = "postprocessing_status" > < / span > < / td >
< td align = "left" width = "40" > < span id = "postprocessing_remainingTimeMinutes" > 0< / span > :< span id = "postprocessing_remainingTimeSeconds" > 0< / span > < / td >
2013-11-16 18:23:14 +01:00
< td align = "left" width = "260px" colspan = "3" >
< span id = "postprocessing_bar" > < progress id = "postprocessingBar" max = "30000" value = "0" style = "width:94%;" / > < / span >
< / td >
< / tr >
< tr class = "TableCellLight" >
2014-02-28 13:58:05 +01:00
< td align = "left" > pending:< / td >
< td align = "left" > collection=< span id = "postprocessing_collection" > < / span > < / td >
< td align = "left" > webgraph=< span id = "postprocessing_webgraph" > < / span > < / td >
2013-11-16 18:23:14 +01:00
< / tr >
2007-02-22 23:26:11 +01:00
< tr class = "TableCellLight" >
< td align = "left" > Traffic (Crawler)< / td >
< td align = "left" > < span id = "trafficCrawler" > < / span > MB< / td >
2013-07-23 19:10:11 +02:00
< td colspan = "3" > < / td >
< / tr >
< tr class = "TableCellLight" >
< td align = "left" > Load< / td >
< td align = "left" > < span id = "load" > < / span > < / td >
2013-11-16 08:23:42 +01:00
< td colspan = "3" > < / td >
< / tr >
2007-01-23 16:35:36 +01:00
< / tbody >
< / table >
2012-02-02 21:33:42 +01:00
< / form >
2012-05-25 01:45:38 +02:00
< / fieldset >
2014-04-22 23:14:54 +02:00
2014-03-16 14:56:30 +01:00
< script >
2014-04-02 01:16:34 +02:00
var maxh = Math.max(document.getElementById("progress").clientHeight, document.getElementById("indexsize").clientHeight, document.getElementById("queues").clientHeight);
document.getElementById("indexsize").style.height = maxh + "px";
document.getElementById("progress").style.height = maxh + "px";
document.getElementById("queues").style.height = maxh + "px";
2014-03-16 14:56:30 +01:00
< / script >
2012-05-25 01:45:38 +02:00
< p class = "watchCrawler" style = "clear:both;" >
#(info)#
2007-01-23 16:35:36 +01:00
<!-- 0 -->
::
<!-- 1 -->
Error with profile management. Please stop YaCy, delete the file DATA/PLASMADB/crawlProfiles0.db
and restart. ::
<!-- 2 -->
Error: #[errmsg]# ::
<!-- 3 -->
Application not yet initialized. Sorry. Please wait some seconds and repeat
the request. ::
<!-- 4 -->
< strong > ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with
crawl root "#[crawlingStart]#".< / strong > Please try again with different
filter. ::
<!-- 5 -->
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#< br >
::
<!-- 6 -->
Error with URL input "#[crawlingStart]#": #[error]# ::
<!-- 7 -->
Error with file input "#[crawlingStart]#": #[error]# ::
<!-- 8 -->
Crawling of "#[crawlingURL]#" started. < strong > Please wait some seconds,
it may take some seconds until the first result appears there.< / strong >
2012-12-29 04:53:20 +01:00
If you crawl any un-wanted pages, you can delete them < a href = "IndexCreateQueues_p.html?stack=LOCAL" > here< / a > .< br / >
2012-05-25 01:45:38 +02:00
#(/info)#
< / p >
2007-01-23 16:35:36 +01:00
<!-- crawl queues -->
2014-04-22 23:14:54 +02:00
#(info-queue)#::< div class = "alert alert-warning" > #[message]#< / div > #(/info-queue)#
2012-05-23 18:00:37 +02:00
<!-- crawl profile list -->
#(crawlProfilesShow)#::
< fieldset >
2012-11-25 15:43:42 +01:00
< legend > Running Crawls (#[count]#)< / legend >
2014-03-31 01:15:35 +02:00
< table border = "0" summary = "A list of crawl profiles and their current settings." >
2012-05-23 18:00:37 +02:00
< colgroup >
< col width = "16" / >
< col width = "140" / >
< / colgroup >
< tr class = "TableHeader" >
2012-09-14 12:25:46 +02:00
< td > < strong > Name< / strong > < / td >
2012-11-25 15:43:42 +01:00
#(debug)#::< td > < strong > Count< / strong > < / td > #(/debug)#
2012-05-23 18:00:37 +02:00
< td > < strong > Status< / strong > < / td >
< / tr >
#{list}#
< tr class = "TableCell#(dark)#Light::Dark#(/dark)#" >
2012-09-14 12:25:46 +02:00
< td > #[name]#< / td >
2012-11-25 15:43:42 +01:00
#(debug)#::< td > #[count]#< / td > #(/debug)#
2012-05-23 18:00:37 +02:00
< td > #(terminateButton)#::
< div style = "text-decoration:blink;float:left;" > Running< / div >
< form style = "float:left;" action = "Crawler_p.html" method = "get" enctype = "multipart/form-data" accept-charset = "UTF-8" > < div >
< input type = "hidden" name = "handle" value = "#[handle]#" / >
2014-04-29 16:22:31 +02:00
< input type = "submit" name = "terminate" value = "Terminate" class = "btn btn-danger btn-xs" / >
2012-05-23 18:00:37 +02:00
< / div > < / form >
#(/terminateButton)#
< / td >
< / tr >
#{/list}#
< / table >
2012-10-23 02:50:26 +02:00
#(linkstructure)#::
2014-04-03 14:51:19 +02:00
< script src = "/js/d3.v3.min.js" > < / script >
< script src = "/js/hypertree.js" > < / script >
< div id = "linkstructure" > < / div >
2014-04-04 12:48:55 +02:00
< script > $ ( document ) . ready ( linkstructure ( "#[hosts]#" , "#linkstructure" , 1280 , 720 , 3000 , 700 ) ) ; < / script > ::
2012-10-23 02:50:26 +02:00
< script type = "text/javascript" >
2012-10-25 10:18:28 +02:00
imagestub = "WebStructurePicture_p.png?host=#[hosts]#&depth=4&width=1024&height=512&nodes=600&time=1000&colortext=888888&colorback=FFFFFF&colordot0=1111BB&colordota=11BB11&colorline=222222&colorlineend=333333";
2012-10-23 02:50:26 +02:00
idx = 0;
2012-10-23 18:11:19 +02:00
setTimeout("doanimation(500)", 500);
function doanimation(nexttimeout) {
2012-10-23 02:50:26 +02:00
var accessPicture = document.getElementById("WebPicture");
if (accessPicture != null) {
idx++;
accessPicture.src = imagestub + "& idx=" + idx;
2012-10-23 18:11:19 +02:00
setTimeout("doanimation(" + (nexttimeout > 3000 ? 3000 : nexttimeout * 1.2) + ")", nexttimeout);
2012-10-23 02:50:26 +02:00
}
}
< / script >
< div style = "clear:both; text-align:left;" >
2014-02-10 21:40:42 +01:00
< img id = "WebPicture" src = "env/grafics/invisible.png" / >
2012-10-23 02:50:26 +02:00
< / div >
#(/linkstructure)#
2012-05-25 01:45:38 +02:00
< h3 > Crawled Pages< / h3 >
< p id = "crawllist" > < / p >
2012-05-23 18:00:37 +02:00
< / fieldset >
#(/crawlProfilesShow)#
2007-01-23 16:35:36 +01:00
2012-05-25 01:45:38 +02:00
2007-01-23 16:35:36 +01:00
#%env/templates/footer.template%#
< / body >
< / html >