2009-08-24 17:24:02 +02:00
// SearchEventCache.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
2009-09-05 22:41:21 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2009-08-24 17:24:02 +02:00
//
// LICENSE
2011-06-01 21:31:56 +02:00
//
2009-08-24 17:24:02 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2011-09-25 18:59:06 +02:00
package net.yacy.search.query ;
2009-08-24 17:24:02 +02:00
2010-09-06 18:05:19 +02:00
import java.util.Iterator ;
2010-09-02 23:52:45 +02:00
import java.util.Map ;
2010-11-28 03:57:31 +01:00
import java.util.SortedMap ;
2010-09-06 18:05:19 +02:00
import java.util.concurrent.ConcurrentHashMap ;
2011-06-01 21:31:56 +02:00
import java.util.concurrent.ConcurrentMap ;
2009-08-24 17:24:02 +02:00
2011-06-01 21:31:56 +02:00
import net.yacy.kelondro.logging.Log ;
2010-09-02 23:52:45 +02:00
import net.yacy.kelondro.util.MemoryControl ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.SeedDB ;
2010-03-20 11:28:03 +01:00
import net.yacy.repository.LoaderDispatcher ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
2011-11-18 14:09:07 +01:00
import net.yacy.search.SwitchboardConstants ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.index.Segment ;
import net.yacy.search.ranking.RankingProfile ;
import net.yacy.search.snippet.ContentDomain ;
2010-12-06 15:34:58 +01:00
import de.anomic.data.WorkTables ;
2009-08-24 17:24:02 +02:00
public class SearchEventCache {
2010-11-28 03:57:31 +01:00
private static ConcurrentMap < String , SearchEvent > lastEvents = new ConcurrentHashMap < String , SearchEvent > ( ) ; // a cache for objects from this class: re-use old search requests
2010-09-06 18:05:19 +02:00
public static final long eventLifetimeBigMem = 600000 ; // the time an event will stay in the cache when available memory is high, 10 Minutes
public static final long eventLifetimeMediumMem = 60000 ; // the time an event will stay in the cache when available memory is medium, 1 Minute
public static final long eventLifetimeShortMem = 10000 ; // the time an event will stay in the cache when memory is low, 10 seconds
2011-03-01 10:03:33 +01:00
public static final long memlimitHigh = 600 * 1024 * 1024 ; // 400 MB
public static final long memlimitMedium = 200 * 1024 * 1024 ; // 100 MB
2009-08-26 17:59:55 +02:00
public static String lastEventID = " " ;
2010-09-02 23:52:45 +02:00
public static long cacheInsert = 0 , cacheHit = 0 , cacheMiss = 0 , cacheDelete = 0 ;
2011-06-01 21:31:56 +02:00
2010-09-02 23:52:45 +02:00
public static int size ( ) {
return lastEvents . size ( ) ;
}
2011-06-01 21:31:56 +02:00
public static void put ( final String eventID , final SearchEvent event ) {
2011-05-26 16:35:32 +02:00
if ( MemoryControl . shortStatus ( ) ) cleanupEvents ( true ) ;
2009-08-26 17:59:55 +02:00
lastEventID = eventID ;
2011-06-01 21:31:56 +02:00
final SearchEvent oldEvent = lastEvents . put ( eventID , event ) ;
2010-09-02 23:52:45 +02:00
if ( oldEvent = = null ) cacheInsert + + ;
2009-08-26 17:59:55 +02:00
}
2011-06-01 21:31:56 +02:00
2011-11-24 17:05:09 +01:00
public static boolean delete ( final String urlhash ) {
for ( final SearchEvent event : lastEvents . values ( ) ) {
if ( event . result ( ) . delete ( urlhash ) ) return true ;
}
return false ;
}
2011-05-27 16:26:37 +02:00
public static void cleanupEvents ( boolean all ) {
2009-08-24 17:24:02 +02:00
// remove old events in the event cache
2011-05-27 16:26:37 +02:00
if ( MemoryControl . shortStatus ( ) ) all = true ;
2010-09-02 23:52:45 +02:00
// the less memory is there, the less time is acceptable for elements in the cache
2011-06-01 21:31:56 +02:00
final long memx = MemoryControl . available ( ) ;
final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem ;
2011-06-23 13:57:17 +02:00
Map . Entry < String , SearchEvent > eventEntry ;
2011-06-01 21:31:56 +02:00
final Iterator < Map . Entry < String , SearchEvent > > i = lastEvents . entrySet ( ) . iterator ( ) ;
2011-06-23 13:57:17 +02:00
SearchEvent event ;
2010-09-06 18:05:19 +02:00
while ( i . hasNext ( ) ) {
2011-06-23 13:57:17 +02:00
eventEntry = i . next ( ) ;
event = eventEntry . getValue ( ) ;
if ( event = = null ) continue ;
if ( all | | event . getEventTime ( ) + acceptTime < System . currentTimeMillis ( ) ) {
if ( event . workerAlive ( ) ) {
event . cleanup ( ) ;
2010-09-06 18:05:19 +02:00
}
}
2011-06-23 13:57:17 +02:00
if ( ! event . workerAlive ( ) ) {
i . remove ( ) ;
cacheDelete + + ;
}
2011-06-01 21:31:56 +02:00
}
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
2009-08-24 17:24:02 +02:00
public static SearchEvent getEvent ( final String eventID ) {
2011-06-01 21:31:56 +02:00
final SearchEvent event = lastEvents . get ( eventID ) ;
2010-09-02 23:52:45 +02:00
if ( event = = null ) cacheMiss + + ; else cacheHit + + ;
return event ;
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
public static int countAliveThreads ( ) {
int alive = 0 ;
for ( final SearchEvent e : SearchEventCache . lastEvents . values ( ) ) {
2011-06-23 13:57:17 +02:00
if ( e . workerAlive ( ) ) alive + + ;
2011-06-01 21:31:56 +02:00
}
return alive ;
}
private static SearchEvent dummyEvent = null ;
private static SearchEvent getDummyEvent ( final WorkTables workTables , final LoaderDispatcher loader , final Segment indexSegment ) {
2011-07-14 05:25:57 +02:00
Log . logWarning ( " SearchEventCache " , " returning dummy event " ) ;
2011-06-01 21:31:56 +02:00
if ( dummyEvent ! = null ) return dummyEvent ;
final QueryParams query = new QueryParams ( " " , 0 , null , indexSegment , new RankingProfile ( ContentDomain . TEXT ) , " " ) ;
dummyEvent = new SearchEvent ( query , null , workTables , null , false , loader , 0 , 0 , 0 , 0 , false ) ;
return dummyEvent ;
}
2009-08-24 17:24:02 +02:00
public static SearchEvent getEvent (
final QueryParams query ,
2011-10-04 11:06:24 +02:00
final SeedDB peers ,
2010-12-06 15:34:58 +01:00
final WorkTables workTables ,
2010-11-28 03:57:31 +01:00
final SortedMap < byte [ ] , String > preselectedPeerHashes ,
2010-03-20 11:28:03 +01:00
final boolean generateAbstracts ,
2011-02-13 18:37:28 +01:00
final LoaderDispatcher loader ,
2011-03-04 14:44:00 +01:00
final int remote_maxcount ,
final long remote_maxtime ,
2011-02-13 18:37:28 +01:00
final int burstRobinsonPercent ,
final int burstMultiwordPercent ) {
2011-06-01 21:31:56 +02:00
final String id = query . id ( false ) ;
2009-08-24 17:24:02 +02:00
SearchEvent event = SearchEventCache . lastEvents . get ( id ) ;
2010-09-02 23:52:45 +02:00
if ( event = = null ) cacheMiss + + ; else cacheHit + + ;
2009-12-02 01:37:59 +01:00
if ( Switchboard . getSwitchboard ( ) ! = null & & ! Switchboard . getSwitchboard ( ) . crawlQueues . noticeURL . isEmpty ( ) & & event ! = null & & System . currentTimeMillis ( ) - event . getEventTime ( ) > 60000 ) {
2009-08-24 17:24:02 +02:00
// if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl
// to prevent that this happens during a person switches between the different result pages, a re-search happens no more than
// once a minute
SearchEventCache . lastEvents . remove ( id ) ;
2010-09-02 23:52:45 +02:00
cacheDelete + + ;
2009-08-24 17:24:02 +02:00
event = null ;
} else {
if ( event ! = null ) {
//re-new the event time for this event, so it is not deleted next time too early
2009-08-26 17:59:55 +02:00
event . resetEventTime ( ) ;
2009-08-24 17:24:02 +02:00
// replace the query, because this contains the current result offset
2009-08-26 17:59:55 +02:00
event . setQuery ( query ) ;
2009-08-24 17:24:02 +02:00
}
}
if ( event = = null ) {
2011-06-01 21:31:56 +02:00
// throttling in case of too many search requests
int waitcount = 0 ;
throttling : while ( true ) {
2011-06-23 13:57:17 +02:00
final int allowedThreads = ( int ) Math . max ( 1 , MemoryControl . available ( ) / ( query . snippetCacheStrategy = = null ? 3 : 30 ) / 1024 / 1024 ) ;
2011-06-01 21:31:56 +02:00
// make room if there are too many search events (they need a lot of RAM)
2011-06-23 13:57:17 +02:00
if ( SearchEventCache . lastEvents . size ( ) > = allowedThreads ) {
2011-06-01 21:31:56 +02:00
Log . logWarning ( " SearchEventCache " , " throttling phase 1: " + SearchEventCache . lastEvents . size ( ) + " in cache; " + countAliveThreads ( ) + " alive; " + allowedThreads + " allowed " ) ;
cleanupEvents ( false ) ;
} else break throttling ;
// if there are still some then delete just all
2011-06-23 13:57:17 +02:00
if ( SearchEventCache . lastEvents . size ( ) > = allowedThreads ) {
2011-06-01 21:31:56 +02:00
Log . logWarning ( " SearchEventCache " , " throttling phase 2: " + SearchEventCache . lastEvents . size ( ) + " in cache; " + countAliveThreads ( ) + " alive; " + allowedThreads + " allowed " ) ;
cleanupEvents ( true ) ;
} else break throttling ;
// now there might be still events left that are alive
if ( countAliveThreads ( ) < allowedThreads ) break throttling ;
// finally we just wait some time until we get access
Log . logWarning ( " SearchEventCache " , " throttling phase 3: " + SearchEventCache . lastEvents . size ( ) + " in cache; " + countAliveThreads ( ) + " alive; " + allowedThreads + " allowed " ) ;
2011-12-21 00:32:03 +01:00
try { Thread . sleep ( 200 ) ; } catch ( final InterruptedException e ) { }
2011-06-01 21:31:56 +02:00
waitcount + + ;
2011-12-21 00:32:03 +01:00
if ( waitcount > = 100 ) return getDummyEvent ( workTables , loader , query . getSegment ( ) ) ;
2011-06-01 21:31:56 +02:00
}
2011-06-23 13:57:17 +02:00
if ( waitcount > 0 ) {
// do not fetch snippets because that is most time-expensive
query . snippetCacheStrategy = null ;
}
2011-06-01 21:31:56 +02:00
// check if there are too many other searches alive now
Log . logInfo ( " SearchEventCache " , " getEvent: " + SearchEventCache . lastEvents . size ( ) + " in cache; " + countAliveThreads ( ) + " alive " ) ;
2009-08-30 12:28:23 +02:00
// start a new event
2011-11-18 14:09:07 +01:00
final boolean delete = Switchboard . getSwitchboard ( ) = = null | | Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . SEARCH_VERIFY_DELETE , true ) ;
2011-03-21 08:50:34 +01:00
event = new SearchEvent ( query , peers , workTables , preselectedPeerHashes , generateAbstracts , loader , remote_maxcount , remote_maxtime , burstRobinsonPercent , burstMultiwordPercent , delete ) ;
2011-05-27 11:32:03 +02:00
MemoryControl . request ( 100 * 1024 * 1024 , false ) ; // this may trigger a short memory status which causes a reducing of cache space of other threads
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
2009-08-24 17:24:02 +02:00
return event ;
}
}