2008-07-07 02:40:45 +02:00
// ResourceObserver.java
// -----------------------
2010-02-06 19:48:06 +01:00
// (c) David Wieditz; lotus at mail.berlios.de
// first published 6.2.2010
//
// based on the former code (c) by Detlef Reichl; detlef!reichl()gmx!org
2008-07-07 02:40:45 +02:00
// Pforzheim, Germany, 2008
//
2010-02-06 19:48:06 +01:00
// part of YaCy
2008-10-31 12:33:17 +01:00
//
2008-07-07 02:40:45 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2012-09-21 15:48:16 +02:00
package net.yacy.search ;
2008-06-06 15:10:21 +02:00
2010-02-06 19:48:06 +01:00
import java.io.File ;
2014-02-12 01:00:44 +01:00
import java.io.IOException ;
2008-06-06 15:10:21 +02:00
2014-02-11 03:20:03 +01:00
import org.apache.commons.io.FileUtils ;
2013-01-25 04:24:36 +01:00
import net.yacy.cora.document.WordCache ;
import net.yacy.cora.protocol.Domains ;
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2014-02-12 01:00:44 +01:00
import net.yacy.crawler.data.Cache ;
import net.yacy.crawler.data.ResultURLs ;
import net.yacy.data.WorkTables ;
import net.yacy.kelondro.data.word.WordReference ;
import net.yacy.kelondro.rwi.IndexCell ;
2009-11-06 20:13:30 +01:00
import net.yacy.kelondro.util.MemoryControl ;
2014-02-12 01:00:44 +01:00
import net.yacy.peers.NewsPool ;
2013-01-25 04:24:36 +01:00
import net.yacy.search.query.SearchEventCache ;
2008-06-06 15:10:21 +02:00
2010-02-06 19:48:06 +01:00
public class ResourceObserver {
2013-07-09 14:28:25 +02:00
public static final ConcurrentLog log = new ConcurrentLog ( " RESOURCE OBSERVER " ) ;
2012-02-23 01:20:42 +01:00
2014-02-12 01:00:44 +01:00
// status type for which shows where in the control-circuit model a memory state can be categorized
2011-04-12 07:02:36 +02:00
public enum Space implements Comparable < Space > {
2014-02-12 01:00:44 +01:00
EXHAUSTED , // smallest space state, outside of over/undershot
NOMINAL , // wanted-space state between steady-state and under/overshot
AMPLE ; // largest space state, below steady-state
2011-04-12 07:02:36 +02:00
}
2012-02-23 01:20:42 +01:00
2010-02-06 19:48:06 +01:00
private final Switchboard sb ;
private final File path ; // path to check
2012-02-23 01:20:42 +01:00
2014-02-12 01:00:44 +01:00
private Space normalizedDiskFree = Space . AMPLE ;
private Space normalizedDiskUsed = Space . AMPLE ;
private Space normalizedMemoryFree = Space . AMPLE ;
2012-02-23 01:20:42 +01:00
2010-02-06 19:48:06 +01:00
public ResourceObserver ( final Switchboard sb ) {
2008-06-06 15:10:21 +02:00
this . sb = sb ;
2014-02-12 01:00:44 +01:00
this . path = sb . getDataPath ( SwitchboardConstants . INDEX_PRIMARY_PATH , " " ) . getParentFile ( ) ;
2013-07-09 14:28:25 +02:00
log . info ( " path for disc space measurement: " + this . path ) ;
2009-05-21 10:30:34 +02:00
}
2012-02-23 01:20:42 +01:00
2009-05-21 10:30:34 +02:00
public static void initThread ( ) {
2010-02-06 19:48:06 +01:00
final Switchboard sb = Switchboard . getSwitchboard ( ) ;
sb . observer = new ResourceObserver ( Switchboard . getSwitchboard ( ) ) ;
2009-05-21 10:30:34 +02:00
sb . observer . resourceObserverJob ( ) ;
2010-02-06 19:48:06 +01:00
}
2012-02-23 01:20:42 +01:00
2008-11-01 09:56:58 +01:00
/ * *
* checks the resources and pauses crawls if necessary
* /
2008-06-19 14:40:44 +02:00
public void resourceObserverJob ( ) {
2011-08-22 19:50:03 +02:00
MemoryControl . setProperMbyte ( getMinFreeMemory ( ) ) ;
2010-02-06 19:48:06 +01:00
2014-02-12 01:00:44 +01:00
this . normalizedDiskFree = getNormalizedDiskFree ( ) ;
this . normalizedDiskUsed = getNormalizedDiskUsed ( true ) ;
2012-02-23 01:20:42 +01:00
this . normalizedMemoryFree = getNormalizedMemoryFree ( ) ;
2010-02-06 19:48:06 +01:00
2014-02-12 01:00:44 +01:00
// take actions if disk space is below AMPLE
if ( this . normalizedDiskFree ! = Space . AMPLE | |
this . normalizedDiskUsed ! = Space . AMPLE | |
this . normalizedMemoryFree ! = Space . AMPLE ) {
2012-11-06 15:21:56 +01:00
String reason = " " ;
2014-02-12 01:00:44 +01:00
if ( this . normalizedDiskFree ! = Space . AMPLE ) reason + = " not enough disk space, " + getUsableSpace ( ) ;
if ( this . normalizedDiskUsed ! = Space . AMPLE ) reason + = " too high disk usage, " + getNormalizedDiskUsed ( true ) ;
if ( this . normalizedMemoryFree ! = Space . AMPLE ) reason + = " not enough memory space " ;
2012-09-06 22:10:03 +02:00
if ( ! this . sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL ) ) {
2013-07-09 14:28:25 +02:00
log . info ( " pausing local crawls " ) ;
2012-11-06 15:21:56 +01:00
this . sb . pauseCrawlJob ( SwitchboardConstants . CRAWLJOB_LOCAL_CRAWL , " resource observer: " + reason ) ;
2012-09-06 22:10:03 +02:00
}
if ( ! this . sb . crawlJobIsPaused ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL ) ) {
2013-07-09 14:28:25 +02:00
log . info ( " pausing remote triggered crawls " ) ;
2012-11-06 15:21:56 +01:00
this . sb . pauseCrawlJob ( SwitchboardConstants . CRAWLJOB_REMOTE_TRIGGERED_CRAWL , " resource observer: " + reason ) ;
2012-09-06 22:10:03 +02:00
}
2010-02-06 19:48:06 +01:00
2014-02-12 01:00:44 +01:00
if ( ( this . normalizedDiskFree = = Space . EXHAUSTED | | this . normalizedMemoryFree ! = Space . AMPLE ) & & this . sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , false ) ) {
2013-07-09 14:28:25 +02:00
log . info ( " disabling index receive " ) ;
2012-02-23 01:20:42 +01:00
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , false ) ;
this . sb . peers . mySeed ( ) . setFlagAcceptRemoteIndex ( false ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , true ) ;
2010-02-06 19:48:06 +01:00
}
}
2012-02-23 01:20:42 +01:00
2014-02-12 01:00:44 +01:00
// shrink resources if space is EXHAUSTED
if ( ( this . normalizedDiskFree = = Space . EXHAUSTED & & this . sb . getConfigBool ( SwitchboardConstants . RESOURCE_DISK_FREE_AUTOREGULATE , false ) ) | |
( this . normalizedDiskUsed = = Space . EXHAUSTED & & this . sb . getConfigBool ( SwitchboardConstants . RESOURCE_DISK_USED_AUTOREGULATE , false ) ) ) {
shrinkmethods : while ( true /*this is not a loop, just a construct that we can leave with a break*/ ) {
// delete old releases
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// delete fetched snippets
log . info ( " DISK SPACE EXHAUSTED - deleting snippet cache " ) ;
sb . tables . clear ( WorkTables . TABLE_SEARCH_FAILURE_NAME ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// clear HTCACHE
log . info ( " DISK SPACE EXHAUSTED - deleting HTCACHE " ) ;
Cache . clear ( ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// delete logs
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// delete robots.txt
log . info ( " DISK SPACE EXHAUSTED - deleting robots.txt database " ) ;
try { sb . robots . clear ( ) ; } catch ( final IOException e ) { }
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// delete news
log . info ( " DISK SPACE EXHAUSTED - deleting News database " ) ;
sb . peers . newsPool . clear ( NewsPool . INCOMING_DB ) ; sb . peers . newsPool . clear ( NewsPool . PROCESSED_DB ) ;
sb . peers . newsPool . clear ( NewsPool . OUTGOING_DB ) ; sb . peers . newsPool . clear ( NewsPool . PUBLISHED_DB ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
// clear citations
if ( sb . index . connectedCitation ( ) ) {
log . info ( " DISK SPACE EXHAUSTED - deleting citations " ) ;
try { sb . index . urlCitation ( ) . clear ( ) ; } catch ( final IOException e ) { }
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
// throw away crawl queues, if they are large
if ( sb . crawlQueues . coreCrawlJobSize ( ) > 1000 ) {
log . info ( " DISK SPACE EXHAUSTED - deleting crawl queues " ) ;
sb . crawlQueues . clear ( ) ;
sb . crawlStacker . clear ( ) ;
ResultURLs . clearStacks ( ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
// cut away too large RWIs
IndexCell < WordReference > termIndex = sb . index . termIndex ( ) ;
try {
int shrinkedReferences = termIndex . deleteOld ( 100 , 10000 ) ;
if ( shrinkedReferences > 0 ) {
log . info ( " DISK SPACE EXHAUSTED - shrinked " + shrinkedReferences + " RWI references to a maximum of 100 " ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break ;
}
} catch ( IOException e ) {
}
// delete too old RWIs
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// delete fulltext from large Solr documents
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
// run a solr optimize
this . sb . index . fulltext ( ) . commit ( false ) ;
this . sb . index . fulltext ( ) . optimize ( 1 ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
/ *
// delete old Solr documents
long day = 1000 * 60 * 60 * 24 ;
for ( int t = 12 ; t > = 1 ; t - - ) {
log . info ( " DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " months " ) ;
this . sb . index . fulltext ( ) . deleteOldDocuments ( t * 30 * day , true ) ;
this . sb . index . fulltext ( ) . commit ( false ) ;
this . sb . index . fulltext ( ) . optimize ( 1 ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
}
for ( int t = 30 ; t > 3 ; t - - ) {
log . info ( " DISK SPACE EXHAUSTED - deleting documents with loaddate > " + t + " days " ) ;
this . sb . index . fulltext ( ) . deleteOldDocuments ( t * day , true ) ;
this . sb . index . fulltext ( ) . commit ( false ) ;
this . sb . index . fulltext ( ) . optimize ( 1 ) ;
if ( getNormalizedDiskFree ( ) = = Space . AMPLE & & getNormalizedDiskUsed ( false ) = = Space . AMPLE ) break shrinkmethods ;
}
* /
// WE SHOULD NEVER GET UP TO HERE...
/ *
// delete ALL RWIs
if ( sb . index . termIndex ( ) ! = null ) {
try { sb . index . termIndex ( ) . clear ( ) ; } catch ( final IOException e ) { }
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
}
// delete full Solr
try { sb . index . fulltext ( ) . clearLocalSolr ( ) ; } catch ( final IOException e ) { }
//if (getNormalizedDiskFree() == Space.AMPLE && getNormalizedDiskUsed(false) == Space.AMPLE) break;
* /
break ; // DO NOT REMOVE THIS, the loop may run forever. It shall run only once.
}
this . normalizedDiskFree = getNormalizedDiskFree ( ) ;
this . normalizedDiskUsed = getNormalizedDiskUsed ( false ) ;
this . normalizedMemoryFree = getNormalizedMemoryFree ( ) ;
}
// normalize state if the resources are AMPLE
if ( this . normalizedDiskFree = = Space . AMPLE & & this . normalizedDiskUsed = = Space . AMPLE & & this . normalizedMemoryFree = = Space . AMPLE ) {
if ( this . sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , false ) ) { // we were wrong!
log . info ( " enabling index receive " ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , true ) ;
this . sb . peers . mySeed ( ) . setFlagAcceptRemoteIndex ( true ) ;
this . sb . setConfig ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , false ) ;
}
log . info ( " resources ok " ) ;
}
2010-02-06 19:48:06 +01:00
}
2012-02-23 01:20:42 +01:00
2014-02-11 03:20:03 +01:00
private long sizeOfDirectory_lastCountTime = 0 ;
private long sizeOfDirectory_lastCountValue = 0 ;
2014-02-12 01:00:44 +01:00
public long getSizeOfDataPath ( final boolean cached ) {
if ( cached & & System . currentTimeMillis ( ) - this . sizeOfDirectory_lastCountTime < 600000 ) return this . sizeOfDirectory_lastCountValue ;
2014-02-11 03:20:03 +01:00
this . sizeOfDirectory_lastCountTime = System . currentTimeMillis ( ) ;
2014-04-04 15:26:39 +02:00
try {
this . sizeOfDirectory_lastCountValue = FileUtils . sizeOfDirectory ( this . path ) ;
} catch ( Throwable e ) { } // org.apache.commons.io.FileUtils.sizeOf calls sizes of files which are there temporary and may cause an exception. Thats a bug inside FileUtils
2014-02-11 03:20:03 +01:00
return this . sizeOfDirectory_lastCountValue ;
}
public long getUsableSpace ( ) {
return this . path . getUsableSpace ( ) ;
}
2014-02-12 01:00:44 +01:00
private Space getNormalizedDiskUsed ( final boolean cached ) {
final long currentUsed = getSizeOfDataPath ( cached ) ;
//final long currentSpace = getUsableSpace(this.path);
if ( currentUsed < 1L ) return Space . AMPLE ;
Space ret = Space . AMPLE ;
if ( currentUsed > getMaxUsedDiskOvershot ( ) ) {
log . warn ( " Volume " + this . path . toString ( ) + " : used space ( " + ( currentUsed / 1024 / 1024 ) + " MB) is too high (> " + ( getMaxUsedDiskOvershot ( ) / 1024 / 1024 ) + " MB) " ) ;
ret = Space . EXHAUSTED ;
return ret ;
}
if ( currentUsed > getMaxUsedDiskSteadystate ( ) ) {
log . info ( " Volume " + this . path . toString ( ) + " : used space ( " + ( currentUsed / 1024 / 1024 ) + " MB) is high, but nominal (> " + ( getMaxUsedDiskSteadystate ( ) / 1024 / 1024 ) + " MB) " ) ;
ret = Space . NOMINAL ;
return ret ;
}
return ret ;
}
2014-02-11 03:20:03 +01:00
2010-02-06 19:48:06 +01:00
/ * *
* returns the amount of disk space available
* @return < ul >
* < li > < code > HIGH < / code > if disk space is available < / li >
* < li > < code > MEDIUM < / code > if low disk space is available < / li >
* < li > < code > LOW < / code > if lower than hardlimit disk space is available < / li >
* < / ul >
* /
2011-04-12 07:02:36 +02:00
private Space getNormalizedDiskFree ( ) {
2014-02-11 03:20:03 +01:00
final long currentSpace = getUsableSpace ( ) ;
2011-07-03 20:13:37 +02:00
//final long currentSpace = getUsableSpace(this.path);
2014-02-12 01:00:44 +01:00
if ( currentSpace < 1L ) return Space . AMPLE ; // this happens if the function does not work, like on Windows
Space ret = Space . AMPLE ;
2012-02-23 01:20:42 +01:00
2014-02-12 01:00:44 +01:00
if ( currentSpace < getMinFreeDiskUndershot ( ) ) {
log . warn ( " Volume " + this . path . toString ( ) + " : free space ( " + ( currentSpace / 1024 / 1024 ) + " MB) is too low (< " + ( getMinFreeDiskSteadystate ( ) / 1024 / 1024 ) + " MB) " ) ;
ret = Space . EXHAUSTED ;
return ret ;
}
if ( currentSpace < getMinFreeDiskSteadystate ( ) ) {
log . info ( " Volume " + this . path . toString ( ) + " : free space ( " + ( currentSpace / 1024 / 1024 ) + " MB) is low, but nominal (< " + ( getMinFreeDiskSteadystate ( ) / 1024 / 1024 ) + " MB) " ) ;
ret = Space . NOMINAL ;
return ret ;
2010-02-06 19:48:06 +01:00
}
return ret ;
}
2012-02-23 01:20:42 +01:00
2011-04-12 07:02:36 +02:00
private Space getNormalizedMemoryFree ( ) {
2014-02-12 01:00:44 +01:00
if ( MemoryControl . properState ( ) ) return Space . AMPLE ;
2013-08-08 14:38:26 +02:00
// clear some caches - @all: are there more of these, we could clear here?
2013-11-07 21:30:17 +01:00
this . sb . index . clearCaches ( ) ;
2013-08-08 14:38:26 +02:00
SearchEventCache . cleanupEvents ( true ) ;
this . sb . trail . clear ( ) ;
Switchboard . urlBlacklist . clearblacklistCache ( ) ;
WordCache . clearCommonWords ( ) ;
Domains . clear ( ) ;
2014-02-12 01:00:44 +01:00
return MemoryControl . properState ( ) ? Space . AMPLE : Space . EXHAUSTED ;
2008-06-06 15:10:21 +02:00
}
2012-02-23 01:20:42 +01:00
2008-11-01 09:56:58 +01:00
/ * *
* @return < code > true < / code > if disk space is available
* /
2010-02-06 19:48:06 +01:00
public boolean getDiskAvailable ( ) {
2014-02-12 01:00:44 +01:00
return this . normalizedDiskFree = = Space . AMPLE ;
2008-06-06 15:10:21 +02:00
}
2012-02-23 01:20:42 +01:00
2008-11-01 09:56:58 +01:00
/ * *
* @return < code > true < / code > if memory is available
* /
2010-02-06 19:48:06 +01:00
public boolean getMemoryAvailable ( ) {
2014-02-12 01:00:44 +01:00
return this . normalizedMemoryFree = = Space . AMPLE ;
}
/ * *
* @return amount of space ( bytes ) that should be used in steady state
* /
public long getMaxUsedDiskSteadystate ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_USED_MAX_STEADYSTATE , 524288 ) /* MB */ * 1024L * 1024L ;
2008-06-06 15:10:21 +02:00
}
2012-02-23 01:20:42 +01:00
2008-06-19 14:40:44 +02:00
/ * *
2014-02-12 01:00:44 +01:00
* @return amount of space ( bytes ) that should at least be kept free as hard limit ; the limit when autoregulation to steady state should start
2008-06-19 14:40:44 +02:00
* /
2014-02-12 01:00:44 +01:00
public long getMaxUsedDiskOvershot ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_USED_MAX_OVERSHOT , 1048576 ) /* MB */ * 1024L * 1024L ;
}
/ * *
* @return amount of space ( bytes ) that should be kept free as steady state
* /
public long getMinFreeDiskSteadystate ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_FREE_MIN_STEADYSTATE , 2048 ) /* MB */ * 1024L * 1024L ;
2009-10-31 20:12:53 +01:00
}
2012-02-23 01:20:42 +01:00
2009-10-31 20:12:53 +01:00
/ * *
2014-02-12 01:00:44 +01:00
* @return amount of space ( bytes ) that should at least be kept free as hard limit ; the limit when autoregulation to steady state should start
2009-10-31 20:12:53 +01:00
* /
2014-02-12 01:00:44 +01:00
public long getMinFreeDiskUndershot ( ) {
return this . sb . getConfigLong ( SwitchboardConstants . RESOURCE_DISK_FREE_MIN_UNDERSHOT , 1024 ) /* MB */ * 1024L * 1024L ;
2008-06-06 15:10:21 +02:00
}
2012-02-23 01:20:42 +01:00
2010-01-10 20:04:43 +01:00
/ * *
2011-01-02 21:38:01 +01:00
* @return amount of space ( MiB ) that should at least be free
2010-01-10 20:04:43 +01:00
* /
public long getMinFreeMemory ( ) {
2012-02-23 01:20:42 +01:00
return this . sb . getConfigLong ( SwitchboardConstants . MEMORY_ACCEPTDHT , 0 ) ;
2010-01-10 20:04:43 +01:00
}
2012-02-23 01:20:42 +01:00
2010-02-06 19:48:06 +01:00
}