fix for search bug that appeared when looking at page 3 of results or further

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6515 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-12-03 12:25:03 +00:00
parent 29fde9ed49
commit 4782d2c438
6 changed files with 28 additions and 21 deletions

View File

@ -55,6 +55,7 @@ import de.anomic.data.listManager;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProcess;
import de.anomic.search.ReferenceOrder;
import de.anomic.search.SearchEventCache;
import de.anomic.search.Segment;
import de.anomic.search.Switchboard;
@ -407,7 +408,7 @@ public class IndexControlRWIs_p {
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash()));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash()));
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
@ -503,7 +504,8 @@ public class IndexControlRWIs_p {
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1);
final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1);
ranked.run();
if (ranked.filteredCount() == 0) {

View File

@ -195,12 +195,13 @@ public class DocumentIndex extends Segment {
final String querystring,
final Segment indexSegment) {
QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
return findMetadata(query);
ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
return findMetadata(query, order);
}
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query) {
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {
RankingProcess rankedCache = new RankingProcess(query, 1000, 2);
RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2);
rankedCache.run();
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();

View File

@ -76,7 +76,6 @@ public final class QueryParams {
public boolean onlineSnippetFetch;
public RankingProfile ranking;
private Segment indexSegment;
private final ReferenceOrder order;
public String host; // this is the client host that starts the query, not a site operator
public String sitehash; // this is a domain hash, 6 bytes long or null
public String authorhash;
@ -127,7 +126,6 @@ public final class QueryParams {
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false;
this.navigators = "all";
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment;
}
@ -177,14 +175,9 @@ public final class QueryParams {
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = specialRights;
this.order = new ReferenceOrder(this.ranking, this.targetlang);
this.indexSegment = indexSegment;
}
public ReferenceOrder getOrder() {
return this.order;
}
public Segment getSegment() {
return this.indexSegment;
}

View File

@ -82,9 +82,9 @@ public final class RankingProcess extends Thread {
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
private final ReferenceOrder order;
public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) {
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
@ -93,6 +93,7 @@ public final class RankingProcess extends Thread {
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashSet<String>();
this.query = query;
this.order = order;
this.maxentries = maxentries;
this.remote_peerCount = 0;
this.remote_indexCount = 0;
@ -115,6 +116,10 @@ public final class RankingProcess extends Thread {
return this.query;
}
public ReferenceOrder getOrder() {
return this.order;
}
public void run() {
// do a search
@ -158,7 +163,7 @@ public final class RankingProcess extends Thread {
long timer = System.currentTimeMillis();
// normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.query.getOrder().normalizeWith(index);
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index);
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
// iterate over normalized entries and select some that are better than currently stored
@ -232,7 +237,7 @@ public final class RankingProcess extends Thread {
for (WordReferenceVars fEntry: filteredEntries) {
// kick out entries that are too bad according to current findings
r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
r = Long.valueOf(this.order.cardinal(fEntry));
assert maxentries != 0;
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;

View File

@ -173,7 +173,7 @@ public class ResultFetcher {
// place the result to the result vector
// apply post-ranking
long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word()));
long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word()));
ranking += postRanking(resultEntry, rankedCache.getTopics());
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
result.push(resultEntry, ranking);

View File

@ -79,6 +79,7 @@ public final class SearchEvent {
private TreeMap<byte[], String> IAResults;
private TreeMap<byte[], Integer> IACount;
private byte[] IAmaxcounthash, IAneardhthash;
private final ReferenceOrder order;
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
final yacySeedDB peers,
@ -98,6 +99,7 @@ public final class SearchEvent {
this.IAmaxcounthash = null;
this.IAneardhthash = null;
this.localSearchThread = null;
this.order = new ReferenceOrder(query.ranking, query.targetlang);
final long start = System.currentTimeMillis();
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
@ -106,7 +108,7 @@ public final class SearchEvent {
// initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads
this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1);
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1);
// start a local search concurrently
this.rankedCache.start();
@ -149,7 +151,7 @@ public final class SearchEvent {
this.results = new ResultFetcher(rankedCache, query, peers, 10000);
} else {
// do a local search
this.rankedCache = new RankingProcess(query, max_results_preparation, 2);
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);
this.rankedCache.run();
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
@ -191,8 +193,12 @@ public final class SearchEvent {
// store this search to a cache so it can be re-used
if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true);
SearchEventCache.put(query.id(false), this);
}
}
public ReferenceOrder getOrder() {
return this.order;
}
public long getEventTime() {
return this.eventTime;
}