mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fix for search bug that appeared when looking at page 3 of results or further
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6515 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
29fde9ed49
commit
4782d2c438
|
@ -55,6 +55,7 @@ import de.anomic.data.listManager;
|
||||||
import de.anomic.http.server.RequestHeader;
|
import de.anomic.http.server.RequestHeader;
|
||||||
import de.anomic.search.QueryParams;
|
import de.anomic.search.QueryParams;
|
||||||
import de.anomic.search.RankingProcess;
|
import de.anomic.search.RankingProcess;
|
||||||
|
import de.anomic.search.ReferenceOrder;
|
||||||
import de.anomic.search.SearchEventCache;
|
import de.anomic.search.SearchEventCache;
|
||||||
import de.anomic.search.Segment;
|
import de.anomic.search.Segment;
|
||||||
import de.anomic.search.Switchboard;
|
import de.anomic.search.Switchboard;
|
||||||
|
@ -407,7 +408,7 @@ public class IndexControlRWIs_p {
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_domlength", DigestURI.domLengthEstimation(entry.hash()));
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_ybr", RankingProcess.ybr(entry.hash()));
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_tf", 1000.0 * entry.word().termFrequency());
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getQuery().getOrder() == null) ? -1 : ranked.getQuery().getOrder().authority(entry.hash()));
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_authority", (ranked.getOrder() == null) ? -1 : ranked.getOrder().authority(entry.hash()));
|
||||||
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
|
prop.put("genUrlList_urlList_"+i+"_urlExists_date", DateFormatter.formatShortDay(new Date(entry.word().lastModified())));
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintitle", entry.word().wordsintitle());
|
||||||
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
|
prop.putNum("genUrlList_urlList_"+i+"_urlExists_wordsintext", entry.word().wordsintext());
|
||||||
|
@ -503,7 +504,8 @@ public class IndexControlRWIs_p {
|
||||||
|
|
||||||
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
|
public static RankingProcess genSearchresult(final serverObjects prop, final Switchboard sb, Segment segment, final byte[] keyhash, final Bitfield filter) {
|
||||||
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
|
final QueryParams query = new QueryParams(new String(keyhash), -1, filter, segment, sb.getRanking());
|
||||||
final RankingProcess ranked = new RankingProcess(query, Integer.MAX_VALUE, 1);
|
final ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
|
||||||
|
final RankingProcess ranked = new RankingProcess(query, order, Integer.MAX_VALUE, 1);
|
||||||
ranked.run();
|
ranked.run();
|
||||||
|
|
||||||
if (ranked.filteredCount() == 0) {
|
if (ranked.filteredCount() == 0) {
|
||||||
|
|
|
@ -195,12 +195,13 @@ public class DocumentIndex extends Segment {
|
||||||
final String querystring,
|
final String querystring,
|
||||||
final Segment indexSegment) {
|
final Segment indexSegment) {
|
||||||
QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
|
QueryParams query = new QueryParams(querystring, 100, null, indexSegment, textRankingDefault);
|
||||||
return findMetadata(query);
|
ReferenceOrder order = new ReferenceOrder(query.ranking, query.targetlang);
|
||||||
|
return findMetadata(query, order);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query) {
|
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {
|
||||||
|
|
||||||
RankingProcess rankedCache = new RankingProcess(query, 1000, 2);
|
RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2);
|
||||||
rankedCache.run();
|
rankedCache.run();
|
||||||
|
|
||||||
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();
|
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();
|
||||||
|
|
|
@ -76,7 +76,6 @@ public final class QueryParams {
|
||||||
public boolean onlineSnippetFetch;
|
public boolean onlineSnippetFetch;
|
||||||
public RankingProfile ranking;
|
public RankingProfile ranking;
|
||||||
private Segment indexSegment;
|
private Segment indexSegment;
|
||||||
private final ReferenceOrder order;
|
|
||||||
public String host; // this is the client host that starts the query, not a site operator
|
public String host; // this is the client host that starts the query, not a site operator
|
||||||
public String sitehash; // this is a domain hash, 6 bytes long or null
|
public String sitehash; // this is a domain hash, 6 bytes long or null
|
||||||
public String authorhash;
|
public String authorhash;
|
||||||
|
@ -127,7 +126,6 @@ public final class QueryParams {
|
||||||
this.handle = Long.valueOf(System.currentTimeMillis());
|
this.handle = Long.valueOf(System.currentTimeMillis());
|
||||||
this.specialRights = false;
|
this.specialRights = false;
|
||||||
this.navigators = "all";
|
this.navigators = "all";
|
||||||
this.order = new ReferenceOrder(this.ranking, this.targetlang);
|
|
||||||
this.indexSegment = indexSegment;
|
this.indexSegment = indexSegment;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,14 +175,9 @@ public final class QueryParams {
|
||||||
this.remotepeer = null;
|
this.remotepeer = null;
|
||||||
this.handle = Long.valueOf(System.currentTimeMillis());
|
this.handle = Long.valueOf(System.currentTimeMillis());
|
||||||
this.specialRights = specialRights;
|
this.specialRights = specialRights;
|
||||||
this.order = new ReferenceOrder(this.ranking, this.targetlang);
|
|
||||||
this.indexSegment = indexSegment;
|
this.indexSegment = indexSegment;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ReferenceOrder getOrder() {
|
|
||||||
return this.order;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Segment getSegment() {
|
public Segment getSegment() {
|
||||||
return this.indexSegment;
|
return this.indexSegment;
|
||||||
}
|
}
|
||||||
|
|
|
@ -82,9 +82,9 @@ public final class RankingProcess extends Thread {
|
||||||
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
|
private final ConcurrentHashMap<String, Integer> ref; // reference score computation for the commonSense heuristic
|
||||||
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
|
private final ConcurrentHashMap<String, HostInfo> hostNavigator;
|
||||||
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
|
private final ConcurrentHashMap<String, AuthorInfo> authorNavigator;
|
||||||
|
private final ReferenceOrder order;
|
||||||
|
|
||||||
|
public RankingProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final int concurrency) {
|
||||||
public RankingProcess(final QueryParams query, final int maxentries, final int concurrency) {
|
|
||||||
// we collect the urlhashes and construct a list with urlEntry objects
|
// we collect the urlhashes and construct a list with urlEntry objects
|
||||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||||
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
||||||
|
@ -93,6 +93,7 @@ public final class RankingProcess extends Thread {
|
||||||
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
|
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
|
||||||
this.handover = new HashSet<String>();
|
this.handover = new HashSet<String>();
|
||||||
this.query = query;
|
this.query = query;
|
||||||
|
this.order = order;
|
||||||
this.maxentries = maxentries;
|
this.maxentries = maxentries;
|
||||||
this.remote_peerCount = 0;
|
this.remote_peerCount = 0;
|
||||||
this.remote_indexCount = 0;
|
this.remote_indexCount = 0;
|
||||||
|
@ -115,6 +116,10 @@ public final class RankingProcess extends Thread {
|
||||||
return this.query;
|
return this.query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ReferenceOrder getOrder() {
|
||||||
|
return this.order;
|
||||||
|
}
|
||||||
|
|
||||||
public void run() {
|
public void run() {
|
||||||
// do a search
|
// do a search
|
||||||
|
|
||||||
|
@ -158,7 +163,7 @@ public final class RankingProcess extends Thread {
|
||||||
long timer = System.currentTimeMillis();
|
long timer = System.currentTimeMillis();
|
||||||
|
|
||||||
// normalize entries
|
// normalize entries
|
||||||
final BlockingQueue<WordReferenceVars> decodedEntries = this.query.getOrder().normalizeWith(index);
|
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index);
|
||||||
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
|
MemoryTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.NORMALIZING, index.size(), System.currentTimeMillis() - timer), false);
|
||||||
|
|
||||||
// iterate over normalized entries and select some that are better than currently stored
|
// iterate over normalized entries and select some that are better than currently stored
|
||||||
|
@ -232,7 +237,7 @@ public final class RankingProcess extends Thread {
|
||||||
for (WordReferenceVars fEntry: filteredEntries) {
|
for (WordReferenceVars fEntry: filteredEntries) {
|
||||||
|
|
||||||
// kick out entries that are too bad according to current findings
|
// kick out entries that are too bad according to current findings
|
||||||
r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
|
r = Long.valueOf(this.order.cardinal(fEntry));
|
||||||
assert maxentries != 0;
|
assert maxentries != 0;
|
||||||
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;
|
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;
|
||||||
|
|
||||||
|
|
|
@ -173,7 +173,7 @@ public class ResultFetcher {
|
||||||
|
|
||||||
// place the result to the result vector
|
// place the result to the result vector
|
||||||
// apply post-ranking
|
// apply post-ranking
|
||||||
long ranking = Long.valueOf(query.getOrder().cardinal(resultEntry.word()));
|
long ranking = Long.valueOf(rankedCache.getOrder().cardinal(resultEntry.word()));
|
||||||
ranking += postRanking(resultEntry, rankedCache.getTopics());
|
ranking += postRanking(resultEntry, rankedCache.getTopics());
|
||||||
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
|
//System.out.println("*** resultEntry.hash = " + resultEntry.hash());
|
||||||
result.push(resultEntry, ranking);
|
result.push(resultEntry, ranking);
|
||||||
|
|
|
@ -79,6 +79,7 @@ public final class SearchEvent {
|
||||||
private TreeMap<byte[], String> IAResults;
|
private TreeMap<byte[], String> IAResults;
|
||||||
private TreeMap<byte[], Integer> IACount;
|
private TreeMap<byte[], Integer> IACount;
|
||||||
private byte[] IAmaxcounthash, IAneardhthash;
|
private byte[] IAmaxcounthash, IAneardhthash;
|
||||||
|
private final ReferenceOrder order;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
|
@SuppressWarnings("unchecked") SearchEvent(final QueryParams query,
|
||||||
final yacySeedDB peers,
|
final yacySeedDB peers,
|
||||||
|
@ -98,6 +99,7 @@ public final class SearchEvent {
|
||||||
this.IAmaxcounthash = null;
|
this.IAmaxcounthash = null;
|
||||||
this.IAneardhthash = null;
|
this.IAneardhthash = null;
|
||||||
this.localSearchThread = null;
|
this.localSearchThread = null;
|
||||||
|
this.order = new ReferenceOrder(query.ranking, query.targetlang);
|
||||||
|
|
||||||
final long start = System.currentTimeMillis();
|
final long start = System.currentTimeMillis();
|
||||||
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
|
if ((query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ||
|
||||||
|
@ -106,7 +108,7 @@ public final class SearchEvent {
|
||||||
|
|
||||||
// initialize a ranking process that is the target for data
|
// initialize a ranking process that is the target for data
|
||||||
// that is generated concurrently from local and global search threads
|
// that is generated concurrently from local and global search threads
|
||||||
this.rankedCache = new RankingProcess(query, max_results_preparation, fetchpeers + 1);
|
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, fetchpeers + 1);
|
||||||
|
|
||||||
// start a local search concurrently
|
// start a local search concurrently
|
||||||
this.rankedCache.start();
|
this.rankedCache.start();
|
||||||
|
@ -149,7 +151,7 @@ public final class SearchEvent {
|
||||||
this.results = new ResultFetcher(rankedCache, query, peers, 10000);
|
this.results = new ResultFetcher(rankedCache, query, peers, 10000);
|
||||||
} else {
|
} else {
|
||||||
// do a local search
|
// do a local search
|
||||||
this.rankedCache = new RankingProcess(query, max_results_preparation, 2);
|
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);
|
||||||
this.rankedCache.run();
|
this.rankedCache.run();
|
||||||
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
|
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
|
||||||
|
|
||||||
|
@ -191,8 +193,12 @@ public final class SearchEvent {
|
||||||
// store this search to a cache so it can be re-used
|
// store this search to a cache so it can be re-used
|
||||||
if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true);
|
if (MemoryControl.available() < 1024 * 1024 * 10) SearchEventCache.cleanupEvents(true);
|
||||||
SearchEventCache.put(query.id(false), this);
|
SearchEventCache.put(query.id(false), this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ReferenceOrder getOrder() {
|
||||||
|
return this.order;
|
||||||
|
}
|
||||||
|
|
||||||
public long getEventTime() {
|
public long getEventTime() {
|
||||||
return this.eventTime;
|
return this.eventTime;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user