From ead39064c5c00bd3af83e3fbc020439f04fc6fac Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 3 Sep 2008 10:04:46 +0000 Subject: [PATCH] fixed problem with wrong result number calculation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5105 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/kelondro/kelondroTree.java | 3 ++ .../de/anomic/plasma/plasmaSearchEvent.java | 38 ++++++++++--------- source/de/anomic/yacy/yacySearch.java | 3 +- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index a023772b9..f11ff6732 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -151,6 +151,9 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex n.commit(); } + // the has-property in kelondroTree should not be used, because it has the effect of doubling the IO activity in case that + // the result is 'true'. Whenever possible, please use the get method, store the result in a dummy value and test the result + // by comparing it with null. public boolean has(final byte[] key) { boolean result; synchronized (writeSearchObj) { diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 441fec0bb..d4d28db7b 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -68,7 +68,7 @@ public final class plasmaSearchEvent { public static String lastEventID = ""; private static ConcurrentHashMap lastEvents = new ConcurrentHashMap(); // a cache for objects from this class: re-use old search requests public static final long eventLifetime = 600000; // the time an event will stay in the cache, 10 Minutes - private static final int max_results_preparation = 200; + private static final int max_results_preparation = 300; private long eventTime; plasmaSearchQuery query; @@ -240,8 +240,8 @@ public final class plasmaSearchEvent { serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "event-cleanup", 0, 0)); // store this search to a cache so it can be re-used - lastEvents.put(query.id(false), this); lastEventID = query.id(false); + lastEvents.put(lastEventID, this); } private class localSearchProcess extends Thread { @@ -460,24 +460,28 @@ public final class plasmaSearchEvent { final ResultURLs crawlResults, final TreeMap preselectedPeerHashes, final boolean generateAbstracts) { - plasmaSearchEvent event = lastEvents.get(query.id(false)); - if (event == null) { - event = new plasmaSearchEvent(query, wordIndex, crawlResults, preselectedPeerHashes, generateAbstracts); + + String id = query.id(false); + plasmaSearchEvent event = lastEvents.get(id); + if (plasmaSwitchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0 && event != null && System.currentTimeMillis() - event.eventTime > 60000) { + // if a local crawl is ongoing, don't use the result from the cache to use possibly more results that come from the current crawl + // to prevent that this happens during a person switches between the different result pages, a re-search happens no more than + // once a minute + lastEvents.remove(id); + event = null; } else { - //re-new the event time for this event, so it is not deleted next time too early - event.eventTime = System.currentTimeMillis(); - // replace the query, because this contains the current result offset - event.query = query; - } - - // if a local crawl is ongoing, do another local search to enrich the current results with more - // entries that can possibly come out of the running crawl - if (plasmaSwitchboard.getSwitchboard().crawlQueues.noticeURL.size() > 0) { - synchronized (event.rankedCache) { - event.rankedCache.execQuery(); + if (event != null) { + //re-new the event time for this event, so it is not deleted next time too early + event.eventTime = System.currentTimeMillis(); + // replace the query, because this contains the current result offset + event.query = query; } } - + if (event == null) { + // generate a new event + event = new plasmaSearchEvent(query, wordIndex, crawlResults, preselectedPeerHashes, generateAbstracts); + } + // if worker threads had been alive, but did not succeed, start them again to fetch missing links if ((query.onlineSnippetFetch) && (!event.anyWorkerAlive()) && diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index bbebef639..4fde7ae5b 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -296,6 +296,7 @@ public class yacySearch extends Thread { if (targets == 0) return new yacySearch[0]; final yacySearch[] searchThreads = new yacySearch[targets]; for (int i = 0; i < targets; i++) { + if (targetPeers[i] == null || targetPeers[i].hash == null) continue; searchThreads[i] = new yacySearch(wordhashes, excludehashes, urlhashes, prefer, filter, count, maxDist, true, targets, targetPeers[i], wordIndex, crawlResults, containerCache, abstractCache, blacklist, rankingProfile, constraint); searchThreads[i].start(); @@ -316,7 +317,7 @@ public class yacySearch extends Thread { // prepare seed targets and threads final yacySeed targetPeer = wordIndex.seedDB.getConnected(targethash); - if (targetPeer == null) return null; + if (targetPeer == null || targetPeer.hash == null) return null; if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(targetPeer.hash)); final yacySearch searchThread = new yacySearch(wordhashes, excludehashes, urlhashes, "", "", 0, 9999, true, 0, targetPeer, wordIndex, crawlResults, containerCache, new TreeMap>(), blacklist, rankingProfile, constraint);