From 4782d2c4388bc9c65f7863112827b28338da796f Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 3 Dec 2009 12:25:03 +0000 Subject: [PATCH] fix for search bug that appeared when looking at page 3 of results or further git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6515 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/IndexControlRWIs_p.java | 6 ++++-- source/de/anomic/search/DocumentIndex.java | 7 ++++--- source/de/anomic/search/QueryParams.java | 7 ------- source/de/anomic/search/RankingProcess.java | 13 +++++++++---- source/de/anomic/search/ResultFetcher.java | 2 +- source/de/anomic/search/SearchEvent.java | 14 ++++++++++---- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index b24e2028c..97d9ea8a5 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -55,6 +55,7 @@ import de.anomic.data.listManager; import de.anomic.http.server.RequestHeader; import de.anomic.search.QueryParams; import de.anomic.search.RankingProcess; +import de.anomic.search.ReferenceOrder; import de.anomic.search.SearchEventCache; import de.anomic.search.Segment; import de.anomic.search.Switchboard; @@ -407,7 +408,7 @@ public class IndexControlRWIs_p { prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency()); - prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash())); + prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash())); prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified()))); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); @@ -503,7 +504,8 @@ public class IndexControlRWIs_p { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking()); - final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1); + final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); + final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1); ranked.run(); if (ranked.filteredCount() == 0) { diff --git a/source/de/anomic/search/DocumentIndex.java b/source/de/anomic/search/DocumentIndex.java index f2d31d977..1eb92d141 100644 --- a/source/de/anomic/search/DocumentIndex.java +++ b/source/de/anomic/search/DocumentIndex.java @@ -195,12 +195,13 @@ public class DocumentIndex extends Segment { final String querystring, final Segment indexSegment) { QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault); - return findMetadata(query); + ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang); + return findMetadata(query, order); } - public static final ArrayList findMetadata(final QueryParams query) { + public static final ArrayList findMetadata(final QueryParams query, final ReferenceOrder order) { - RankingProcess rankedCache = new RankingProcess(query, 1000, 2); + RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2); rankedCache.run(); ArrayList result = new ArrayList(); diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 5f181bd08..4d4de5ae3 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -76,7 +76,6 @@ public final class QueryParams { public boolean onlineSnippetFetch; public RankingProfile ranking; private Segment indexSegment; - private final ReferenceOrder order; public String host; // this is the client host that starts the query, not a site operator public String sitehash; // this is a domain hash, 6 bytes long or null public String authorhash; @@ -127,7 +126,6 @@ public final class QueryParams { this.handle = Long.valueOf(System.currentTimeMillis()); this.specialRights = false; this.navigators = "all"; - this.order = new ReferenceOrder(this.ranking, this.targetlang); this.indexSegment = indexSegment; } @@ -177,14 +175,9 @@ public final class QueryParams { this.remotepeer = null; this.handle = Long.valueOf(System.currentTimeMillis()); this.specialRights = specialRights; - this.order = new ReferenceOrder(this.ranking, this.targetlang); this.indexSegment = indexSegment; } - public ReferenceOrder getOrder() { - return this.order; - } - public Segment getSegment() { return this.indexSegment; } diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index a17dc6463..93116d688 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -82,9 +82,9 @@ public final class RankingProcess extends Thread { private final ConcurrentHashMap ref; // reference score computation for the commonSense heuristic private final ConcurrentHashMap hostNavigator; private final ConcurrentHashMap authorNavigator; + private final ReferenceOrder order; - - public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) { + public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) { // we collect the urlhashes and construct a list with urlEntry objects // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking @@ -93,6 +93,7 @@ public final class RankingProcess extends Thread { this.doubleDomCache = new HashMap>(); this.handover = new HashSet(); this.query = query; + this.order = order; this.maxentries = maxentries; this.remote_peerCount = 0; this.remote_indexCount = 0; @@ -115,6 +116,10 @@ public final class RankingProcess extends Thread { return this.query; } + public ReferenceOrder getOrder() { + return this.order; + } + public void run() { // do a search @@ -158,7 +163,7 @@ public final class RankingProcess extends Thread { long timer = System.currentTimeMillis(); // normalize entries - final BlockingQueue decodedEntries = this.query.getOrder().normalizeWith(index); + final BlockingQueue decodedEntries = this.order.normalizeWith(index); MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false); // iterate over normalized entries and select some that are better than currently stored @@ -232,7 +237,7 @@ public final class RankingProcess extends Thread { for (WordReferenceVars fEntry: filteredEntries) { // kick out entries that are too bad according to current findings - r = Long.valueOf(this.query.getOrder().cardinal(fEntry)); + r = Long.valueOf(this.order.cardinal(fEntry)); assert maxentries != 0; if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue; diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 62623868d..15f2af0c4 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -173,7 +173,7 @@ public class ResultFetcher { // place the result to the result vector // apply post-ranking - long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word())); + long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word())); ranking += postRanking(resultEntry, rankedCache.getTopics()); //System.out.println("*** resultEntry.hash = " + resultEntry.hash()); result.push(resultEntry, ranking); diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 7047e7293..97504dc10 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -79,6 +79,7 @@ public final class SearchEvent { private TreeMap IAResults; private TreeMap IACount; private byte[] IAmaxcounthash, IAneardhthash; + private final ReferenceOrder order; @SuppressWarnings("unchecked") SearchEvent(final QueryParams query, final yacySeedDB peers, @@ -98,6 +99,7 @@ public final class SearchEvent { this.IAmaxcounthash = null; this.IAneardhthash = null; this.localSearchThread = null; + this.order = new ReferenceOrder(query.ranking, query.targetlang); final long start = System.currentTimeMillis(); if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || @@ -106,7 +108,7 @@ public final class SearchEvent { // initialize a ranking process that is the target for data // that is generated concurrently from local and global search threads - this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1); + this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1); // start a local search concurrently this.rankedCache.start(); @@ -149,7 +151,7 @@ public final class SearchEvent { this.results = new ResultFetcher(rankedCache, query, peers, 10000); } else { // do a local search - this.rankedCache = new RankingProcess(query, max_results_preparation, 2); + this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2); this.rankedCache.run(); //CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process); @@ -191,8 +193,12 @@ public final class SearchEvent { // store this search to a cache so it can be re-used if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true); SearchEventCache.put(query.id(false), this); - } - + } + + public ReferenceOrder getOrder() { + return this.order; + } + public long getEventTime() { return this.eventTime; }