From 18172451a007b313ab7bb0fcbfe25da8fd6ce09e Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 12 Jan 2010 15:01:44 +0000 Subject: [PATCH] better search computation: - increased sort limit, now 3000 entries, before: 1000 this should cause that more results can be shown in case of strong limitating constraints, like domain navigation - enhanced the sort process - check against domain navigator bugs - fix in sort stack - showing now all naviagtion pages at first search (not only next page) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6569 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearch.java | 2 +- source/de/anomic/search/DocumentIndex.java | 2 +- source/de/anomic/search/RankingProcess.java | 66 ++++++++++---------- source/de/anomic/search/SearchEvent.java | 2 +- source/net/yacy/kelondro/util/SortStack.java | 14 ++++- 5 files changed, 48 insertions(+), 38 deletions(-) diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 7593db55e..2914aa209 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -591,7 +591,7 @@ public class yacysearch { resnav.append(QueryParams.navurl("html", thispage - 1, display, theQuery, originalUrlMask, null, navigation)); resnav.append("\"> "); } - final int numberofpages = Math.min(10, Math.min(thispage + 2, totalcount / theQuery.displayResults())); + final int numberofpages = Math.min(10, Math.max(thispage + 1, totalcount / theQuery.displayResults())); for (int i = 0; i < numberofpages; i++) { if (i == thispage) { resnav.append(" findMetadata(final QueryParams query, final ReferenceOrder order) { - RankingProcess rankedCache = new RankingProcess(query, order, 1000, 2); + RankingProcess rankedCache = new RankingProcess(query, order, SearchEvent.max_results_preparation, 2); rankedCache.run(); ArrayList result = new ArrayList(); diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index a8339a43b..a314dcb9a 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -67,7 +67,7 @@ public final class RankingProcess extends Thread { private final QueryParams query; private final int maxentries; - private final ConcurrentHashMap urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) + private final ConcurrentHashMap urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final int[] flagcount; // flag counter private final TreeSet misses; // contains url-hashes that could not been found in the LURL-DB //private final int[] domZones; @@ -99,7 +99,7 @@ public final class RankingProcess extends Thread { this.remote_indexCount = 0; this.remote_resourceSize = 0; this.local_resourceSize = 0; - this.urlhashes = new ConcurrentHashMap(0, 0.75f, concurrency); + this.urlhashes = new ConcurrentHashMap(0, 0.75f, concurrency); this.misses = new TreeSet(); this.flagcount = new int[32]; for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;} @@ -172,7 +172,8 @@ public final class RankingProcess extends Thread { String domhash; boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; WordReferenceVars iEntry; - final ArrayList filteredEntries = new ArrayList(); + Long r; + //final ArrayList filteredEntries = new ArrayList(); // apply all constraints try { @@ -225,39 +226,39 @@ public final class RankingProcess extends Thread { } // accept - filteredEntries.add(iEntry); + //filteredEntries.add(iEntry); // increase counter for statistics - if (!local) this.remote_indexCount++; + if (!local) this.remote_indexCount++;/* } - } catch (InterruptedException e) {} - // do the ranking - Long r; - for (WordReferenceVars fEntry: filteredEntries) { - - // kick out entries that are too bad according to current findings - r = Long.valueOf(this.order.cardinal(fEntry)); - assert maxentries != 0; - if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue; - - // insert - if ((maxentries < 0) || (stack.size() < maxentries)) { - // in case that we don't have enough yet, accept any new entry - if (urlhashes.containsKey(fEntry.metadataHash())) continue; - stack.push(fEntry, r); - } else { - // if we already have enough entries, insert only such that are necessary to get a better result - if (stack.bottom(r.longValue())) { - continue; - } - // double-check - if (urlhashes.containsKey(fEntry.metadataHash())) continue; - stack.push(fEntry, r); - } - - } + // do the ranking + for (WordReferenceVars fEntry: filteredEntries) { + */ + // kick out entries that are too bad according to current findings + r = Long.valueOf(this.order.cardinal(iEntry)); + assert maxentries != 0; + + // double-check + if (urlhashes.containsKey(iEntry.metadataHash())) continue; + + // insert + if (maxentries < 0 || stack.size() < maxentries) { + // in case that we don't have enough yet, accept any new entry + stack.push(iEntry, r); + } else { + // if we already have enough entries, insert only such that are necessary to get a better result + if (stack.bottom(r.longValue())) continue; + + // take the entry. the stack is automatically reduced + // to the maximum size by deletion of elements at the bottom + stack.push(iEntry, r); + } + urlhashes.put(iEntry.metadataHash(), r); + } + } catch (InterruptedException e) {} + //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); EventTracker.update("SEARCH", new ProfilingGraph.searchEvent(query.id(true), SearchEvent.PRESORT, index.size(), System.currentTimeMillis() - timer), false, 30000, ProfilingGraph.maxTime); } @@ -574,7 +575,7 @@ public final class RankingProcess extends Thread { URIMetadataRow mr; DigestURI url; String hostname; - for (int i = 0; i < rc; i++) { + loop: for (int i = 0; i < rc; i++) { mr = this.query.getSegment().urlMetadata().load(hsa[i].hashsample, null, 0); if (mr == null) continue; url = mr.metadata().url(); @@ -582,6 +583,7 @@ public final class RankingProcess extends Thread { hostname = url.getHost(); if (hostname == null) continue; if (query.tenant != null && !hostname.contains(query.tenant) && !url.toNormalform(true, true).contains(query.tenant)) continue; + for (NavigatorEntry entry: result) if (entry.name.equals(hostname)) continue loop; // check if one entry already exists result.add(new NavigatorEntry(hostname, hsa[i].count)); } return result; diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 3f5a67d74..17776226f 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -59,7 +59,7 @@ public final class SearchEvent { public static final String NORMALIZING = "normalizing"; public static final String FINALIZATION = "finalization"; - private static final int max_results_preparation = 1000; + public static final int max_results_preparation = 3000; // class variables that may be implemented with an abstract class private long eventTime; diff --git a/source/net/yacy/kelondro/util/SortStack.java b/source/net/yacy/kelondro/util/SortStack.java index 84b34944f..ad54ab729 100644 --- a/source/net/yacy/kelondro/util/SortStack.java +++ b/source/net/yacy/kelondro/util/SortStack.java @@ -177,10 +177,18 @@ public class SortStack { // returns true if the element with that weight would be on the bottom of the stack after inserting if (this.onstack.isEmpty()) return true; Long l; - synchronized (this.onstack) { - l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey(); + + if (this.upward) { + synchronized (this.onstack) { + l = this.onstack.lastKey(); + } + return weight > l.longValue(); + } else { + synchronized (this.onstack) { + l = this.onstack.firstKey(); + } + return weight < l.longValue(); } - return weight > l.longValue(); } public class stackElement {