mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
better search computation:
- increased sort limit, now 3000 entries, before: 1000 this should cause that more results can be shown in case of strong limitating constraints, like domain navigation - enhanced the sort process - check against domain navigator bugs - fix in sort stack - showing now all naviagtion pages at first search (not only next page) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6569 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
d126d6c1b5
commit
18172451a0
|
@ -591,7 +591,7 @@ public class yacysearch {
|
|||
resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation));
|
||||
resnav.append("\"><img src=\"env/grafics/navdl.gif\" width=\"16\" height=\"16\"></a> ");
|
||||
}
|
||||
final int numberofpages = Math.min(10, Math.min(thispage + 2, totalcount / theQuery.displayResults()));
|
||||
final int numberofpages = Math.min(10, Math.max(thispage + 1, totalcount / theQuery.displayResults()));
|
||||
for (int i = 0; i < numberofpages; i++) {
|
||||
if (i == thispage) {
|
||||
resnav.append("<img src=\"env/grafics/navs");
|
||||
|
|
|
@ -201,7 +201,7 @@ public class DocumentIndex extends Segment {
|
|||
|
||||
public static final ArrayList<URIMetadataRow> findMetadata(final QueryParams query, final ReferenceOrder order) {
|
||||
|
||||
RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2);
|
||||
RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 2);
|
||||
rankedCache.run();
|
||||
|
||||
ArrayList<URIMetadataRow> result = new ArrayList<URIMetadataRow>();
|
||||
|
|
|
@ -67,7 +67,7 @@ public final class RankingProcess extends Thread {
|
|||
|
||||
private final QueryParams query;
|
||||
private final int maxentries;
|
||||
private final ConcurrentHashMap<String, Integer> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
|
||||
private final ConcurrentHashMap<String, Long> urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
|
||||
private final int[] flagcount; // flag counter
|
||||
private final TreeSet<String> misses; // contains url-hashes that could not been found in the LURL-DB
|
||||
//private final int[] domZones;
|
||||
|
@ -99,7 +99,7 @@ public final class RankingProcess extends Thread {
|
|||
this.remote_indexCount = 0;
|
||||
this.remote_resourceSize = 0;
|
||||
this.local_resourceSize = 0;
|
||||
this.urlhashes = new ConcurrentHashMap<String, Integer>(0, 0.75f, concurrency);
|
||||
this.urlhashes = new ConcurrentHashMap<String, Long>(0, 0.75f, concurrency);
|
||||
this.misses = new TreeSet<String>();
|
||||
this.flagcount = new int[32];
|
||||
for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
|
||||
|
@ -172,7 +172,8 @@ public final class RankingProcess extends Thread {
|
|||
String domhash;
|
||||
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
|
||||
WordReferenceVars iEntry;
|
||||
final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();
|
||||
Long r;
|
||||
//final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();
|
||||
|
||||
// apply all constraints
|
||||
try {
|
||||
|
@ -225,39 +226,39 @@ public final class RankingProcess extends Thread {
|
|||
}
|
||||
|
||||
// accept
|
||||
filteredEntries.add(iEntry);
|
||||
//filteredEntries.add(iEntry);
|
||||
|
||||
// increase counter for statistics
|
||||
if (!local) this.remote_indexCount++;
|
||||
if (!local) this.remote_indexCount++;/*
|
||||
}
|
||||
} catch (InterruptedException e) {}
|
||||
|
||||
// do the ranking
|
||||
Long r;
|
||||
for (WordReferenceVars fEntry: filteredEntries) {
|
||||
|
||||
// kick out entries that are too bad according to current findings
|
||||
r = Long.valueOf(this.order.cardinal(fEntry));
|
||||
assert maxentries != 0;
|
||||
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;
|
||||
|
||||
// insert
|
||||
if ((maxentries < 0) || (stack.size() < maxentries)) {
|
||||
// in case that we don't have enough yet, accept any new entry
|
||||
if (urlhashes.containsKey(fEntry.metadataHash())) continue;
|
||||
stack.push(fEntry, r);
|
||||
} else {
|
||||
// if we already have enough entries, insert only such that are necessary to get a better result
|
||||
if (stack.bottom(r.longValue())) {
|
||||
continue;
|
||||
}
|
||||
// double-check
|
||||
if (urlhashes.containsKey(fEntry.metadataHash())) continue;
|
||||
stack.push(fEntry, r);
|
||||
}
|
||||
|
||||
}
|
||||
// do the ranking
|
||||
for (WordReferenceVars fEntry: filteredEntries) {
|
||||
*/
|
||||
// kick out entries that are too bad according to current findings
|
||||
r = Long.valueOf(this.order.cardinal(iEntry));
|
||||
assert maxentries != 0;
|
||||
|
||||
// double-check
|
||||
if (urlhashes.containsKey(iEntry.metadataHash())) continue;
|
||||
|
||||
// insert
|
||||
if (maxentries < 0 || stack.size() < maxentries) {
|
||||
// in case that we don't have enough yet, accept any new entry
|
||||
stack.push(iEntry, r);
|
||||
} else {
|
||||
// if we already have enough entries, insert only such that are necessary to get a better result
|
||||
if (stack.bottom(r.longValue())) continue;
|
||||
|
||||
// take the entry. the stack is automatically reduced
|
||||
// to the maximum size by deletion of elements at the bottom
|
||||
stack.push(iEntry, r);
|
||||
}
|
||||
urlhashes.put(iEntry.metadataHash(), r);
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {}
|
||||
|
||||
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
|
||||
EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime);
|
||||
}
|
||||
|
@ -574,7 +575,7 @@ public final class RankingProcess extends Thread {
|
|||
URIMetadataRow mr;
|
||||
DigestURI url;
|
||||
String hostname;
|
||||
for (int i = 0; i < rc; i++) {
|
||||
loop: for (int i = 0; i < rc; i++) {
|
||||
mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0);
|
||||
if (mr == null) continue;
|
||||
url = mr.metadata().url();
|
||||
|
@ -582,6 +583,7 @@ public final class RankingProcess extends Thread {
|
|||
hostname = url.getHost();
|
||||
if (hostname == null) continue;
|
||||
if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue;
|
||||
for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists
|
||||
result.add(new NavigatorEntry(hostname, hsa[i].count));
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -59,7 +59,7 @@ public final class SearchEvent {
|
|||
public static final String NORMALIZING = "normalizing";
|
||||
public static final String FINALIZATION = "finalization";
|
||||
|
||||
private static final int max_results_preparation = 1000;
|
||||
public static final int max_results_preparation = 3000;
|
||||
|
||||
// class variables that may be implemented with an abstract class
|
||||
private long eventTime;
|
||||
|
|
|
@ -177,10 +177,18 @@ public class SortStack<E> {
|
|||
// returns true if the element with that weight would be on the bottom of the stack after inserting
|
||||
if (this.onstack.isEmpty()) return true;
|
||||
Long l;
|
||||
synchronized (this.onstack) {
|
||||
l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey();
|
||||
|
||||
if (this.upward) {
|
||||
synchronized (this.onstack) {
|
||||
l = this.onstack.lastKey();
|
||||
}
|
||||
return weight > l.longValue();
|
||||
} else {
|
||||
synchronized (this.onstack) {
|
||||
l = this.onstack.firstKey();
|
||||
}
|
||||
return weight < l.longValue();
|
||||
}
|
||||
return weight > l.longValue();
|
||||
}
|
||||
|
||||
public class stackElement {
|
||||
|
|
Loading…
Reference in New Issue
Block a user