fix for search bug that appeared when looking at page 3 of results or further

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6515 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-12-03 12:25:03 +00:00
parent 29fde9ed49
commit 4782d2c438
6 changed files with 28 additions and 21 deletions

View File

@ -55,6 +55,7 @@ import de.anomic.data.listManager;
import de.anomic.http.server.RequestHeader; import de.anomic.http.server.RequestHeader;
import de.anomic.search.QueryParams; import de.anomic.search.QueryParams;
import de.anomic.search.RankingProcess; import de.anomic.search.RankingProcess;
import de.anomic.search.ReferenceOrder;
import de.anomic.search.SearchEventCache; import de.anomic.search.SearchEventCache;
import de.anomic.search.Segment; import de.anomic.search.Segment;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
@ -407,7 +408,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash())); prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified()))); prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext()); prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
@ -503,7 +504,8 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) { public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking()); final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1); final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1);
ranked.run(); ranked.run();
if (ranked.filteredCount() == 0) { if (ranked.filteredCount() == 0) {

View File

@ -195,12 +195,13 @@ public class DocumentIndex extends Segment {
final String querystring, final String querystring,
final Segment indexSegment) { final Segment indexSegment) {
QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault); QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
return findMetadata(query); ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
return findMetadata(query, order);
} }
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query) { public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {
RankingProcess rankedCache = new RankingProcess(query, 1000, 2); RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2);
rankedCache.run(); rankedCache.run();
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>(); ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();

View File

@ -76,7 +76,6 @@ public final class QueryParams {
public boolean onlineSnippetFetch; public boolean onlineSnippetFetch;
public RankingProfile ranking; public RankingProfile ranking;
private Segment indexSegment; private Segment indexSegment;
private final ReferenceOrder order;
public String host; // this is the client host that starts the query, not a site operator public String host; // this is the client host that starts the query, not a site operator
public String sitehash; // this is a domain hash, 6 bytes long or null public String sitehash; // this is a domain hash, 6 bytes long or null
public String authorhash; public String authorhash;
@ -127,7 +126,6 @@ public final class QueryParams {
this.handle = Long.valueOf(System.currentTimeMillis()); this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false; this.specialRights = false;
this.navigators = "all"; this.navigators = "all";
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
} }
@ -177,14 +175,9 @@ public final class QueryParams {
this.remotepeer = null; this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis()); this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = specialRights; this.specialRights = specialRights;
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment; this.indexSegment = indexSegment;
} }
public ReferenceOrder getOrder() {
return this.order;
}
public Segment getSegment() { public Segment getSegment() {
return this.indexSegment; return this.indexSegment;
} }

View File

@ -82,9 +82,9 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
private final ConcurrentHashMap<String, HostInfo> hostNavigator; private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator; private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
private final ReferenceOrder order;
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects // we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking // sortorder: 0 = hash, 1 = url, 2 = ranking
@ -93,6 +93,7 @@ public final class RankingProcess extends Thread {
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>(); this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashSet<String>(); this.handover = new HashSet<String>();
this.query = query; this.query = query;
this.order = order;
this.maxentries = maxentries; this.maxentries = maxentries;
this.remote_peerCount = 0; this.remote_peerCount = 0;
this.remote_indexCount = 0; this.remote_indexCount = 0;
@ -115,6 +116,10 @@ public final class RankingProcess extends Thread {
return this.query; return this.query;
} }
public ReferenceOrder getOrder() {
return this.order;
}
public void run() { public void run() {
// do a search // do a search
@ -158,7 +163,7 @@ public final class RankingProcess extends Thread {
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
// normalize entries // normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.query.getOrder().normalizeWith(index); final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false); MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
// iterate over normalized entries and select some that are better than currently stored // iterate over normalized entries and select some that are better than currently stored
@ -232,7 +237,7 @@ public final class RankingProcess extends Thread {
for (WordReferenceVars fEntry: filteredEntries) { for (WordReferenceVars fEntry: filteredEntries) {
// kick out entries that are too bad according to current findings // kick out entries that are too bad according to current findings
r = Long.valueOf(this.query.getOrder().cardinal(fEntry)); r = Long.valueOf(this.order.cardinal(fEntry));
assert maxentries != 0; assert maxentries != 0;
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue; if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;

View File

@ -173,7 +173,7 @@ public class ResultFetcher {
// place the result to the result vector // place the result to the result vector
// apply post-ranking // apply post-ranking
long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word())); long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics()); ranking += postRanking(resultEntry, rankedCache.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash()); //System.out.println("*** resultEntry.hash = " + resultEntry.hash());
result.push(resultEntry, ranking); result.push(resultEntry, ranking);

View File

@ -79,6 +79,7 @@ public final class SearchEvent {
private TreeMap<byte[], String> IAResults; private TreeMap<byte[], String> IAResults;
private TreeMap<byte[], Integer> IACount; private TreeMap<byte[], Integer> IACount;
private byte[] IAmaxcounthash, IAneardhthash; private byte[] IAmaxcounthash, IAneardhthash;
private final ReferenceOrder order;
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query, @SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
final yacySeedDB peers, final yacySeedDB peers,
@ -98,6 +99,7 @@ public final class SearchEvent {
this.IAmaxcounthash = null; this.IAmaxcounthash = null;
this.IAneardhthash = null; this.IAneardhthash = null;
this.localSearchThread = null; this.localSearchThread = null;
this.order = new ReferenceOrder(query.ranking, query.targetlang);
final long start = System.currentTimeMillis(); final long start = System.currentTimeMillis();
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
@ -106,7 +108,7 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data // initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads // that is generated concurrently from local and global search threads
this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1); this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1);
// start a local search concurrently // start a local search concurrently
this.rankedCache.start(); this.rankedCache.start();
@ -149,7 +151,7 @@ public final class SearchEvent {
this.results = new ResultFetcher(rankedCache, query, peers, 10000); this.results = new ResultFetcher(rankedCache, query, peers, 10000);
} else { } else {
// do a local search // do a local search
this.rankedCache = new RankingProcess(query, max_results_preparation, 2); this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);
this.rankedCache.run(); this.rankedCache.run();
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process); //CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
@ -191,8 +193,12 @@ public final class SearchEvent {
// store this search to a cache so it can be re-used // store this search to a cache so it can be re-used
if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true); if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true);
SearchEventCache.put(query.id(false), this); SearchEventCache.put(query.id(false), this);
} }
public ReferenceOrder getOrder() {
return this.order;
}
public long getEventTime() { public long getEventTime() {
return this.eventTime; return this.eventTime;
} }