yacy_search_server/source/net/yacy/search/query/SearchEventCache.java
reger d44d8996d0 Added a “don't store remote search results” option
This is intended for peers who want to participate in the P2P network but don't wish to load/fill-up their index with metadata of every received search result. 
The DHT transfer is not effected by this option (and will work as usual, so that a peer disabling the new store to index switch still receives and holds the metadata according to DHT rules).
Downside for the local peer is that search speed will not improve if search terms are only avail. remote or by quick hits in local index.

To be able to improve the local index a Click-Servlet option was added additionally.
If switched on, all search result links point to this servlet, which forwards the users browser (by html header) to the desired page and feeds the page to the fulltext-index.
The servlet accepts a parameter defining the action to perform (see defaults/web.xml, index, crawl, crawllinks)

The option check-boxes are placed in ConfigPortal.html
2015-01-04 11:10:45 +01:00

182 lines
8.0 KiB
Java

// SearchEventCache.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.search.query;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.WorkTables;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.SeedDB;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
public class SearchEventCache {
private volatile static LinkedHashMap<String, SearchEvent> lastEvents = new LinkedHashMap<String, SearchEvent>(); // a cache for objects from this class: re-use old search requests
private static final long eventLifetimeBigMem = 600000; // the time an event will stay in the cache when available memory is high, 10 Minutes
private static final long eventLifetimeMediumMem = 60000; // the time an event will stay in the cache when available memory is medium, 1 Minute
private static final long eventLifetimeShortMem = 10000; // the time an event will stay in the cache when memory is low, 10 seconds
private static final long memlimitHigh = 600 * 1024 * 1024; // 400 MB
private static final long memlimitMedium = 200 * 1024 * 1024; // 100 MB
public volatile static String lastEventID = "";
public static long cacheInsert = 0, cacheHit = 0, cacheMiss = 0, cacheDelete = 0;
public static int size() {
return lastEvents.size();
}
protected static void put(final String eventID, final SearchEvent event) {
if (MemoryControl.shortStatus()) cleanupEvents(false);
lastEventID = eventID;
synchronized (lastEvents) {
final SearchEvent oldEvent = lastEvents.put(eventID, event);
if (oldEvent == null) cacheInsert++;
}
}
public static boolean delete(final String urlhash) {
synchronized (lastEvents) {
for (final SearchEvent event: lastEvents.values()) {
if (event.delete(urlhash)) return true;
}
}
return false;
}
public static void cleanupEvents(boolean all) {
// remove old events in the event cache
if (MemoryControl.shortStatus()) all = true;
// the less memory is there, the less time is acceptable for elements in the cache
final long memx = MemoryControl.available();
final long acceptTime = memx > memlimitHigh ? eventLifetimeBigMem : memx > memlimitMedium ? eventLifetimeMediumMem : eventLifetimeShortMem;
Map.Entry<String, SearchEvent> eventEntry;
synchronized (lastEvents) {
final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator();
SearchEvent event;
while (i.hasNext()) {
eventEntry = i.next();
event = eventEntry.getValue();
if (event == null) continue;
if (all || event.getEventTime() + acceptTime < System.currentTimeMillis()) {
event.cleanup();
i.remove();
cacheDelete++;
}
}
}
}
public static void cleanupEvents(int maxsize) {
// remove old events in the event cache
if (MemoryControl.shortStatus()) {cleanupEvents(true); return;}
Map.Entry<String, SearchEvent> eventEntry;
synchronized (lastEvents) {
final Iterator<Map.Entry<String, SearchEvent>> i = lastEvents.entrySet().iterator(); // iterates in order of entry
int dc = lastEvents.size() - maxsize;
if (dc <= 0) return;
SearchEvent event;
while (i.hasNext()) {
eventEntry = i.next();
event = eventEntry.getValue();
if (event == null) continue;
event.cleanup();
i.remove();
cacheDelete++;
dc--;
if (dc <= 0) break;
}
}
}
public static SearchEvent getEvent(final String eventID) {
SearchEvent event = lastEvents.get(eventID);
if (event == null) {
synchronized (lastEvents) {
event = lastEvents.get(eventID);
if (event == null) cacheMiss++; else cacheHit++;
}
cacheMiss++;
} else {
cacheHit++;
}
return event;
}
public static SearchEvent getEvent(
final QueryParams query,
final SeedDB peers,
final WorkTables workTables,
final SortedSet<byte[]> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader,
final int remote_maxcount,
final long remote_maxtime) {
if (MemoryControl.shortStatus()) cleanupEvents(true);
final String id = query.id(false);
SearchEvent event = getEvent(id);
if (Switchboard.getSwitchboard() != null && !Switchboard.getSwitchboard().crawlQueues.noticeURL.isEmpty() && event != null && System.currentTimeMillis() - event.getEventTime() > 60000) {
// if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl
// to prevent that this happens during a person switches between the different result pages, a re-search happens no more than
// once a minute
synchronized (lastEvents) {
lastEvents.remove(id);
}
cacheDelete++;
event = null;
} else {
if (event != null) {
//re-new the event time for this event, so it is not deleted next time too early
event.resetEventTime();
// replace the current result offset
event.query.offset = query.offset;
event.query.itemsPerPage = query.itemsPerPage;
}
}
if (event == null) {
// check if there are too many other searches alive now
synchronized (lastEvents) {
ConcurrentLog.info("SearchEventCache", "getEvent: " + lastEvents.size() + " in cache");
}
// start a new event
Switchboard sb = Switchboard.getSwitchboard();
final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);
final boolean addToLocalIdx = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.REMOTESEARCH_RESULT_STORE, true);
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, delete, addToLocalIdx);
MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads
}
return event;
}
}