2012-02-02 21:33:42 +01:00
// status_p
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 18.12.2006 on http://www.anomic.de
// this file was created using the an implementation from IndexCreate_p.java, published 02.12.2004
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2008-08-25 20:11:47 +02:00
2013-11-16 08:23:42 +01:00
import java.io.IOException ;
2010-08-23 14:32:02 +02:00
import net.yacy.cora.protocol.RequestHeader ;
2013-07-23 18:20:07 +02:00
import net.yacy.cora.util.Memory ;
2013-07-12 14:16:51 +02:00
import net.yacy.crawler.CrawlSwitchboard ;
import net.yacy.crawler.data.CrawlProfile ;
import net.yacy.kelondro.index.RowHandleSet ;
2010-09-11 17:58:15 +02:00
import net.yacy.kelondro.io.ByteCount ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.MemoryControl ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.workflow.WorkflowProcessor ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
import net.yacy.search.SwitchboardConstants ;
2013-11-16 18:23:14 +01:00
import net.yacy.search.index.Fulltext ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.index.Segment ;
2014-06-02 15:06:38 +02:00
import net.yacy.search.schema.CollectionConfiguration ;
2013-11-16 08:23:42 +01:00
import net.yacy.search.schema.CollectionSchema ;
import net.yacy.search.schema.WebgraphSchema ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
2006-02-06 20:47:59 +01:00
public class status_p {
2011-11-25 15:09:25 +01:00
2012-02-02 21:33:42 +01:00
public static final String STATE_RUNNING = " running " ;
public static final String STATE_PAUSED = " paused " ;
2011-11-25 15:09:25 +01:00
2012-07-05 09:14:04 +02:00
public static serverObjects respond ( @SuppressWarnings ( " unused " ) final RequestHeader header , final serverObjects post , final serverSwitch env ) {
2006-02-06 20:47:59 +01:00
// return variable that accumulates replacements
2009-07-19 22:37:44 +02:00
final Switchboard sb = ( Switchboard ) env ;
2008-08-02 14:12:04 +02:00
final serverObjects prop = new serverObjects ( ) ;
2011-11-25 15:09:25 +01:00
final boolean html = post ! = null & & post . containsKey ( " html " ) ;
2010-04-27 23:47:41 +02:00
prop . setLocalized ( html ) ;
2012-06-28 14:27:29 +02:00
Segment segment = sb . index ;
2013-11-16 18:23:14 +01:00
Fulltext fulltext = segment . fulltext ( ) ;
2011-11-25 15:09:25 +01:00
2007-10-24 23:38:19 +02:00
prop . put ( " rejected " , " 0 " ) ;
2008-06-04 23:34:57 +02:00
sb . updateMySeed ( ) ;
2009-07-19 22:37:44 +02:00
final int cacheMaxSize = ( int ) sb . getConfigLong ( SwitchboardConstants . WORDCACHE_MAX_COUNT , 10000 ) ;
2013-11-16 18:23:14 +01:00
prop . put ( " ppm " , Switchboard . currentPPM ( ) ) ; // we don't format the ppm here because that will cause that the progress bar shows nothing if the number is > 999
2009-05-28 16:26:05 +02:00
prop . putNum ( " qpm " , sb . peers . mySeed ( ) . getQPM ( ) ) ;
2012-08-31 13:03:00 +02:00
prop . putNum ( " wordCacheSize " , segment . RWIBufferCount ( ) ) ;
2010-04-27 23:47:41 +02:00
prop . putNum ( " wordCacheMaxSize " , cacheMaxSize ) ;
2013-07-17 19:09:25 +02:00
2006-11-03 20:44:31 +01:00
// memory usage and system attributes
2014-02-27 00:31:13 +01:00
prop . putNum ( " usedMemory " , MemoryControl . used ( ) ) ;
2009-01-30 16:33:00 +01:00
prop . putNum ( " freeMemory " , MemoryControl . free ( ) ) ;
prop . putNum ( " totalMemory " , MemoryControl . total ( ) ) ;
2011-07-19 14:48:50 +02:00
prop . putNum ( " maxMemory " , MemoryControl . maxMemory ( ) ) ;
2014-02-27 00:31:13 +01:00
prop . putNum ( " usedDisk " , sb . observer . getSizeOfDataPath ( true ) ) ;
prop . putNum ( " freeDisk " , sb . observer . getUsableSpace ( ) ) ;
2009-10-11 02:12:19 +02:00
prop . putNum ( " processors " , WorkflowProcessor . availableCPU ) ;
2013-07-23 18:20:07 +02:00
prop . putNum ( " load " , Memory . load ( ) ) ;
2014-02-27 00:31:13 +01:00
2006-11-03 20:44:31 +01:00
// proxy traffic
2010-09-11 17:58:15 +02:00
prop . put ( " trafficIn " , ByteCount . getGlobalCount ( ) ) ;
prop . put ( " trafficProxy " , ByteCount . getAccountCount ( ByteCount . PROXY ) ) ;
prop . put ( " trafficCrawler " , ByteCount . getAccountCount ( ByteCount . CRAWLER ) ) ;
2006-11-03 20:44:31 +01:00
2012-02-02 21:33:42 +01:00
// index size
2013-11-16 18:23:14 +01:00
prop . putNum ( " urlpublictextSize " , fulltext . collectionSize ( ) ) ;
prop . putNum ( " urlpublictextSegmentCount " , fulltext . getDefaultConnector ( ) . getSegmentCount ( ) ) ;
2013-12-04 01:54:45 +01:00
prop . putNum ( " webgraphSize " , fulltext . useWebgraph ( ) ? fulltext . webgraphSize ( ) : 0 ) ;
prop . putNum ( " webgraphSegmentCount " , fulltext . useWebgraph ( ) ? fulltext . getWebgraphConnector ( ) . getSegmentCount ( ) : 0 ) ;
2013-11-16 08:23:42 +01:00
prop . putNum ( " citationSize " , segment . citationCount ( ) ) ;
prop . putNum ( " citationSegmentCount " , segment . citationSegmentCount ( ) ) ;
2012-08-31 13:03:00 +02:00
prop . putNum ( " rwipublictextSize " , segment . RWICount ( ) ) ;
2013-07-23 18:03:33 +02:00
prop . putNum ( " rwipublictextSegmentCount " , segment . RWISegmentCount ( ) ) ;
2012-02-02 21:33:42 +01:00
// loader queue
2014-03-03 22:13:40 +01:00
prop . putNum ( " loaderSize " , sb . crawlQueues . activeWorkerEntries ( ) . size ( ) ) ;
2012-02-02 21:33:42 +01:00
prop . putNum ( " loaderMax " , sb . getConfigLong ( SwitchboardConstants . CRAWLER_THREADS_ACTIVE_MAX , 10 ) ) ;
//local crawl queue
prop . putNum ( " localCrawlSize " , sb . getThread ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) . getJobCount ( ) ) ;
prop . put ( " localCrawlState " , sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) ? STATE_PAUSED : STATE_RUNNING ) ;
//global crawl queue
prop . putNum ( " limitCrawlSize " , sb . crawlQueues . limitCrawlJobSize ( ) ) ;
prop . put ( " limitCrawlState " , STATE_RUNNING ) ;
//remote crawl queue
prop . putNum ( " remoteCrawlSize " , sb . getThread ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) . getJobCount ( ) ) ;
prop . put ( " remoteCrawlState " , sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) ? STATE_PAUSED : STATE_RUNNING ) ;
//noload crawl queue
prop . putNum ( " noloadCrawlSize " , sb . crawlQueues . noloadCrawlJobSize ( ) ) ;
prop . put ( " noloadCrawlState " , STATE_RUNNING ) ;
2013-07-12 14:16:51 +02:00
// generate crawl profile table
int count = 0 ;
final int domlistlength = ( post = = null ) ? 160 : post . getInt ( " domlistlength " , 160 ) ;
CrawlProfile profile ;
// put active crawls into list
String hosts = " " ;
for ( final byte [ ] h : sb . crawler . getActive ( ) ) {
profile = sb . crawler . getActive ( h ) ;
if ( CrawlSwitchboard . DEFAULT_PROFILES . contains ( profile . name ( ) ) ) continue ;
profile . putProfileEntry ( " crawlProfiles_list_ " , prop , true , false , count , domlistlength ) ;
RowHandleSet urlhashes = sb . crawler . getURLHashes ( h ) ;
prop . put ( " crawlProfiles_list_ " + count + " _count " , urlhashes = = null ? " unknown " : Integer . toString ( urlhashes . size ( ) ) ) ;
if ( profile . urlMustMatchPattern ( ) = = CrawlProfile . MATCH_ALL_PATTERN ) {
hosts = hosts + " , " + profile . name ( ) ;
}
count + + ;
}
prop . put ( " crawlProfiles_list " , count ) ;
prop . put ( " crawlProfiles_count " , count ) ;
prop . put ( " crawlProfiles " , count = = 0 ? 0 : 1 ) ;
2013-11-16 08:23:42 +01:00
2014-06-02 15:06:38 +02:00
prop . put ( " postprocessingRunning " , CollectionConfiguration . postprocessingRunning ? 1 : 0 ) ;
2013-07-12 14:29:22 +02:00
2013-12-04 01:54:45 +01:00
boolean processCollection = sb . index . fulltext ( ) . getDefaultConfiguration ( ) . contains ( CollectionSchema . process_sxt ) & & ( sb . index . connectedCitation ( ) | | sb . index . fulltext ( ) . useWebgraph ( ) ) ;
boolean processWebgraph = sb . index . fulltext ( ) . getWebgraphConfiguration ( ) . contains ( WebgraphSchema . process_sxt ) & & sb . index . fulltext ( ) . useWebgraph ( ) ;
2013-11-16 08:23:42 +01:00
2014-06-02 15:06:38 +02:00
long timeSinceStart = ( processCollection | | processWebgraph ) & & CollectionConfiguration . postprocessingRunning ? System . currentTimeMillis ( ) - CollectionConfiguration . postprocessingStartTime : 0 ;
//postprocessingCollection1Count = 0;
//postprocessingsWebgraphCount = 0;
long collectionRemainingCount = 0 , webgraphRemainingCount = 0 ;
if ( processCollection ) try { collectionRemainingCount = sb . index . fulltext ( ) . getDefaultConnector ( ) . getCountByQuery ( CollectionConfiguration . collection1query ( sb . index , null ) ) ; } catch ( IOException e ) { }
if ( processWebgraph ) try { webgraphRemainingCount = sb . index . fulltext ( ) . getWebgraphConnector ( ) . getCountByQuery ( CollectionConfiguration . webgraphquery ( sb . index , null ) ) ; } catch ( IOException e ) { }
long countSinceStart = CollectionConfiguration . postprocessingRunning ? CollectionConfiguration . postprocessingCollection1Count + CollectionConfiguration . postprocessingWebgraphCount - collectionRemainingCount - webgraphRemainingCount : 0 ;
int speed = timeSinceStart = = 0 ? 0 : ( int ) ( 60000 * countSinceStart / timeSinceStart ) ; // pages per minute
long remainingTime = speed = = 0 ? 0 : 60000 * collectionRemainingCount / speed ; // millis
int remainingTimeMinutes = ( int ) ( remainingTime / 60000 ) ;
int remainingTimeSeconds = ( int ) ( ( remainingTime - ( remainingTimeMinutes * 60000 ) ) / 1000 ) ;
2013-11-16 08:23:42 +01:00
2013-11-16 18:23:14 +01:00
prop . put ( " postprocessingCollectionRemainingCount " , collectionRemainingCount ) ;
2013-11-16 08:23:42 +01:00
prop . put ( " postprocessingWebgraphRemainingCount " , webgraphRemainingCount ) ;
2014-06-02 15:06:38 +02:00
prop . put ( " postprocessingRunning_activity " , collectionRemainingCount = = CollectionConfiguration . postprocessingCollection1Count & & webgraphRemainingCount = = CollectionConfiguration . postprocessingWebgraphCount ? " citation computation " : collectionRemainingCount = = CollectionConfiguration . postprocessingCollection1Count ? " webgraph " : " collection " ) ;
prop . put ( " postprocessingSpeed " , speed ) ;
prop . put ( " postprocessingElapsedTime " , timeSinceStart ) ;
prop . put ( " postprocessingRemainingTime " , remainingTime ) ;
prop . put ( " postprocessingRemainingTimeMinutes " , ( remainingTimeMinutes < 10 ? " 0 " : " " ) + Integer . toString ( remainingTimeMinutes ) ) ;
prop . put ( " postprocessingRemainingTimeSeconds " , ( remainingTimeSeconds < 10 ? " 0 " : " " ) + Integer . toString ( remainingTimeSeconds ) ) ;
2013-11-16 08:23:42 +01:00
2006-02-06 20:47:59 +01:00
// return rewrite properties
return prop ;
}
2011-11-25 15:09:25 +01:00
2006-02-06 20:47:59 +01:00
}