fixed bug that caused wrong behavior of search result preparation

(second search on same topic resulted in less links)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1502 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-01-31 01:20:28 +00:00
parent 31c8476b5d
commit 3834675084
4 changed files with 53 additions and 36 deletions

View File

@ -118,6 +118,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
plasmaSearchResult result = order();
result.globalContributions = globalContributions;
result.localContributions = rcLocal.size();
flushResults();
// flush results in a separate thread
this.start(); // start to flush results
@ -256,35 +257,18 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
public void run() {
flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive
flushResults();
flushThreads.remove(this);
}
public void flushResults() {
// put all new results into wordIndex
// this must be called after search results had been computed
// it is wise to call this within a separate thread because this method waits untill all
if (searchThreads == null) return;
// it is wise to call this within a separate thread because
// this method waits until all threads are finished
// wait until all threads are finished
int remaining;
int count = 0;
String wordHash;
int allcount = 0;
long starttime = System.currentTimeMillis();
while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) {
// flush the rcGlobal as much as is there so far
if (rcGlobal.size() > 0) synchronized (rcGlobal) {
Iterator hashi = query.queryHashes.iterator();
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
wordIndex.addEntries(rcGlobal, true);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it
count += rcGlobal.size();
rcGlobal.clear();
}
while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) {
allcount += flushResults();
// wait a little bit before trying again
try {Thread.sleep(3000);} catch (InterruptedException e) {}
if (System.currentTimeMillis() - starttime > 90000) {
@ -295,10 +279,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(","));
}
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + allcount + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
// finally delete the temporary index
rcGlobal = null;
flushThreads.remove(this);
}
public int flushResults() {
// flush the rcGlobal as much as is there so far
// this must be called sometime after search results had been computed
int count = 0;
if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
synchronized (rcGlobal) {
String wordHash;
Iterator hashi = query.queryHashes.iterator();
while (hashi.hasNext()) {
wordHash = (String) hashi.next();
rcGlobal.setWordHash(wordHash);
wordIndex.addEntries(rcGlobal, true);
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
}
// the rcGlobal was flushed, empty it
count += rcGlobal.size();
rcGlobal.clear();
}
}
return count;
}
}

View File

@ -408,16 +408,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (container == null) {
container = new plasmaWordIndexEntryContainer(wordHash);
plasmaWordIndexEntryContainer container;
synchronized (cache) {
// get from cache
container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
// get from assortments
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2));
// get from backend
if (maxTime > 0) {
maxTime = maxTime - (System.currentTimeMillis() - start);
if (maxTime < 0) maxTime = 100;
}
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime));
}
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 1) ? -1 : 8 * maxTime / 10));
if (maxTime > 0) {
maxTime -= System.currentTimeMillis() - start;
if (maxTime < 0) maxTime = 0;
}
container.add(backend.getContainer(wordHash, deleteIfEmpty, maxTime));
return container;
}

View File

@ -323,7 +323,7 @@ public final class plasmaWordIndexDistribution {
Iterator urlIter;
plasmaWordIndexEntry indexEntry;
plasmaCrawlLURL.Entry lurl;
int notBoundCounter = 0;
final HashMap knownURLs = new HashMap();
while (
(count > 0) &&
@ -335,6 +335,7 @@ public final class plasmaWordIndexDistribution {
) {
// make an on-the-fly entity and insert values
indexContainer = this.wordIndex.getContainer(nexthash, true, 10000);
int notBoundCounter = 0;
try {
urlIter = indexContainer.entries();
// iterate over indexes to fetch url entries and store them in the urlCache

View File

@ -114,6 +114,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
public int add(plasmaWordIndexEntryContainer c) {
// returns the number of new elements
if (c == null) return 0;
Iterator i = c.entries();
int x = 0;
while (i.hasNext()) {