2009-08-24 17:24:02 +02:00
// SearchEventCache.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
2009-09-05 22:41:21 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2009-08-24 17:24:02 +02:00
//
// LICENSE
2011-06-01 21:31:56 +02:00
//
2009-08-24 17:24:02 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2011-09-25 18:59:06 +02:00
package net.yacy.search.query ;
2009-08-24 17:24:02 +02:00
2010-09-06 18:05:19 +02:00
import java.util.Iterator ;
2013-05-30 12:38:15 +02:00
import java.util.LinkedHashMap ;
2010-09-02 23:52:45 +02:00
import java.util.Map ;
2014-09-30 14:53:52 +02:00
import java.util.SortedSet ;
2009-08-24 17:24:02 +02:00
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2012-09-21 15:48:16 +02:00
import net.yacy.data.WorkTables ;
2010-09-02 23:52:45 +02:00
import net.yacy.kelondro.util.MemoryControl ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.SeedDB ;
2010-03-20 11:28:03 +01:00
import net.yacy.repository.LoaderDispatcher ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
2011-11-18 14:09:07 +01:00
import net.yacy.search.SwitchboardConstants ;
2009-08-24 17:24:02 +02:00
public class SearchEventCache {
2013-05-30 12:38:15 +02:00
private volatile static LinkedHashMap < String , SearchEvent > lastEvents = new LinkedHashMap < String , SearchEvent > ( ) ; // a cache for objects from this class: re-use old search requests
2012-11-01 17:16:43 +01:00
private static final long eventLifetimeBigMem = 600000 ; // the time an event will stay in the cache when available memory is high, 10 Minutes
private static final long eventLifetimeMediumMem = 60000 ; // the time an event will stay in the cache when available memory is medium, 1 Minute
private static final long eventLifetimeShortMem = 10000 ; // the time an event will stay in the cache when memory is low, 10 seconds
private static final long memlimitHigh = 600 * 1024 * 1024 ; // 400 MB
private static final long memlimitMedium = 200 * 1024 * 1024 ; // 100 MB
2012-06-05 12:06:26 +02:00
public volatile static String lastEventID = " " ;
2010-09-02 23:52:45 +02:00
public static long cacheInsert = 0 , cacheHit = 0 , cacheMiss = 0 , cacheDelete = 0 ;
2011-06-01 21:31:56 +02:00
2010-09-02 23:52:45 +02:00
public static int size ( ) {
return lastEvents . size ( ) ;
}
2011-06-01 21:31:56 +02:00
2012-11-01 17:16:43 +01:00
protected static void put ( final String eventID , final SearchEvent event ) {
2012-06-05 12:06:26 +02:00
if ( MemoryControl . shortStatus ( ) ) cleanupEvents ( false ) ;
2009-08-26 17:59:55 +02:00
lastEventID = eventID ;
2013-05-30 12:38:15 +02:00
synchronized ( lastEvents ) {
final SearchEvent oldEvent = lastEvents . put ( eventID , event ) ;
if ( oldEvent = = null ) cacheInsert + + ;
}
2009-08-26 17:59:55 +02:00
}
2011-06-01 21:31:56 +02:00
2011-11-24 17:05:09 +01:00
public static boolean delete ( final String urlhash ) {
2013-05-30 12:38:15 +02:00
synchronized ( lastEvents ) {
for ( final SearchEvent event : lastEvents . values ( ) ) {
if ( event . delete ( urlhash ) ) return true ;
}
2011-11-24 17:05:09 +01:00
}
return false ;
}
2011-05-27 16:26:37 +02:00
public static void cleanupEvents ( boolean all ) {
2009-08-24 17:24:02 +02:00
// remove old events in the event cache
2011-05-27 16:26:37 +02:00
if ( MemoryControl . shortStatus ( ) ) all = true ;
2010-09-02 23:52:45 +02:00
// the less memory is there, the less time is acceptable for elements in the cache
2011-06-01 21:31:56 +02:00
final long memx = MemoryControl . available ( ) ;
final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem ;
2011-06-23 13:57:17 +02:00
Map . Entry < String , SearchEvent > eventEntry ;
2013-05-30 12:38:15 +02:00
synchronized ( lastEvents ) {
final Iterator < Map . Entry < String , SearchEvent > > i = lastEvents . entrySet ( ) . iterator ( ) ;
SearchEvent event ;
while ( i . hasNext ( ) ) {
eventEntry = i . next ( ) ;
event = eventEntry . getValue ( ) ;
if ( event = = null ) continue ;
if ( all | | event . getEventTime ( ) + acceptTime < System . currentTimeMillis ( ) ) {
event . cleanup ( ) ;
i . remove ( ) ;
cacheDelete + + ;
}
}
}
}
public static void cleanupEvents ( int maxsize ) {
// remove old events in the event cache
if ( MemoryControl . shortStatus ( ) ) { cleanupEvents ( true ) ; return ; }
Map . Entry < String , SearchEvent > eventEntry ;
synchronized ( lastEvents ) {
final Iterator < Map . Entry < String , SearchEvent > > i = lastEvents . entrySet ( ) . iterator ( ) ; // iterates in order of entry
int dc = lastEvents . size ( ) - maxsize ;
if ( dc < = 0 ) return ;
SearchEvent event ;
while ( i . hasNext ( ) ) {
eventEntry = i . next ( ) ;
event = eventEntry . getValue ( ) ;
if ( event = = null ) continue ;
2013-02-26 17:16:31 +01:00
event . cleanup ( ) ;
2011-06-23 13:57:17 +02:00
i . remove ( ) ;
cacheDelete + + ;
2013-05-30 12:38:15 +02:00
dc - - ;
if ( dc < = 0 ) break ;
2011-06-23 13:57:17 +02:00
}
2011-06-01 21:31:56 +02:00
}
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
2009-08-24 17:24:02 +02:00
public static SearchEvent getEvent ( final String eventID ) {
2012-06-05 12:06:26 +02:00
SearchEvent event = lastEvents . get ( eventID ) ;
if ( event = = null ) {
synchronized ( lastEvents ) {
event = lastEvents . get ( eventID ) ;
if ( event = = null ) cacheMiss + + ; else cacheHit + + ;
}
cacheMiss + + ;
} else {
cacheHit + + ;
}
2010-09-02 23:52:45 +02:00
return event ;
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
2009-08-24 17:24:02 +02:00
public static SearchEvent getEvent (
final QueryParams query ,
2011-10-04 11:06:24 +02:00
final SeedDB peers ,
2010-12-06 15:34:58 +01:00
final WorkTables workTables ,
2014-09-30 14:53:52 +02:00
final SortedSet < byte [ ] > preselectedPeerHashes ,
2010-03-20 11:28:03 +01:00
final boolean generateAbstracts ,
2011-02-13 18:37:28 +01:00
final LoaderDispatcher loader ,
2011-03-04 14:44:00 +01:00
final int remote_maxcount ,
2014-01-16 17:27:14 +01:00
final long remote_maxtime ) {
2011-06-01 21:31:56 +02:00
2013-03-04 13:01:24 +01:00
if ( MemoryControl . shortStatus ( ) ) cleanupEvents ( true ) ;
2011-06-01 21:31:56 +02:00
final String id = query . id ( false ) ;
2012-06-05 12:06:26 +02:00
SearchEvent event = getEvent ( id ) ;
2009-12-02 01:37:59 +01:00
if ( Switchboard . getSwitchboard ( ) ! = null & & ! Switchboard . getSwitchboard ( ) . crawlQueues . noticeURL . isEmpty ( ) & & event ! = null & & System . currentTimeMillis ( ) - event . getEventTime ( ) > 60000 ) {
2009-08-24 17:24:02 +02:00
// if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl
// to prevent that this happens during a person switches between the different result pages, a re-search happens no more than
// once a minute
2013-05-30 12:38:15 +02:00
synchronized ( lastEvents ) {
lastEvents . remove ( id ) ;
}
2010-09-02 23:52:45 +02:00
cacheDelete + + ;
2009-08-24 17:24:02 +02:00
event = null ;
} else {
if ( event ! = null ) {
2016-12-20 14:52:33 +01:00
if ( query . isLocal ( ) ) {
/ * Searching the local index only : we do not reuse the cached event each time the page size or offset changes .
* This allow to request last result pages of large result sets ( larger than SearchEvent . max_results_node )
* without the need to retrieve all the beginning pages * /
if ( event . query . offset ! = query . offset | | event . query . itemsPerPage ! = query . itemsPerPage ) {
synchronized ( lastEvents ) {
lastEvents . remove ( id ) ;
}
cacheDelete + + ;
event = null ;
}
} else {
//re-new the event time for this event, so it is not deleted next time too early
event . resetEventTime ( ) ;
// replace the current result offset
event . query . offset = query . offset ;
event . query . itemsPerPage = query . itemsPerPage ;
}
2009-08-24 17:24:02 +02:00
}
}
if ( event = = null ) {
2011-06-01 21:31:56 +02:00
// check if there are too many other searches alive now
2013-05-30 12:38:15 +02:00
synchronized ( lastEvents ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . info ( " SearchEventCache " , " getEvent: " + lastEvents . size ( ) + " in cache " ) ;
2013-05-30 12:38:15 +02:00
}
2009-08-30 12:28:23 +02:00
// start a new event
2012-12-02 16:54:29 +01:00
Switchboard sb = Switchboard . getSwitchboard ( ) ;
2015-12-13 20:01:49 +01:00
final boolean delete = sb = = null | | Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . SEARCH_VERIFY_DELETE , true )
| | ( sb . getConfigBool ( SwitchboardConstants . NETWORK_SEARCHVERIFY , false ) & & sb . peers . mySeed ( ) . getFlagAcceptRemoteIndex ( ) ) ;
2015-01-04 11:10:45 +01:00
final boolean addToLocalIdx = sb = = null | | Switchboard . getSwitchboard ( ) . getConfigBool ( SwitchboardConstants . REMOTESEARCH_RESULT_STORE , true ) ;
event = new SearchEvent ( query , peers , workTables , preselectedPeerHashes , generateAbstracts , loader , remote_maxcount , remote_maxtime , delete , addToLocalIdx ) ;
2015-12-16 02:20:03 +01:00
/* Optional config option may be valued to limit size of remote documents added to local index */
if ( sb ! = null ) {
event . setRemoteDocStoredMaxSize ( sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_RESULT_STORE_MAXSIZE , - 1 ) ) ;
}
2011-05-27 11:32:03 +02:00
MemoryControl . request ( 100 * 1024 * 1024 , false ) ; // this may trigger a short memory status which causes a reducing of cache space of other threads
2009-08-24 17:24:02 +02:00
}
2011-06-01 21:31:56 +02:00
2009-08-24 17:24:02 +02:00
return event ;
}
}