mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fixed bug that caused wrong behavior of search result preparation
(second search on same topic resulted in less links) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1502 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
31c8476b5d
commit
3834675084
|
@ -118,6 +118,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
plasmaSearchResult result = order();
|
||||
result.globalContributions = globalContributions;
|
||||
result.localContributions = rcLocal.size();
|
||||
flushResults();
|
||||
|
||||
// flush results in a separate thread
|
||||
this.start(); // start to flush results
|
||||
|
@ -256,35 +257,18 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
|
||||
public void run() {
|
||||
flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive
|
||||
flushResults();
|
||||
flushThreads.remove(this);
|
||||
}
|
||||
|
||||
public void flushResults() {
|
||||
|
||||
// put all new results into wordIndex
|
||||
// this must be called after search results had been computed
|
||||
// it is wise to call this within a separate thread because this method waits untill all
|
||||
if (searchThreads == null) return;
|
||||
// it is wise to call this within a separate thread because
|
||||
// this method waits until all threads are finished
|
||||
|
||||
// wait until all threads are finished
|
||||
int remaining;
|
||||
int count = 0;
|
||||
String wordHash;
|
||||
int allcount = 0;
|
||||
long starttime = System.currentTimeMillis();
|
||||
while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) {
|
||||
// flush the rcGlobal as much as is there so far
|
||||
if (rcGlobal.size() > 0) synchronized (rcGlobal) {
|
||||
Iterator hashi = query.queryHashes.iterator();
|
||||
while (hashi.hasNext()) {
|
||||
wordHash = (String) hashi.next();
|
||||
rcGlobal.setWordHash(wordHash);
|
||||
wordIndex.addEntries(rcGlobal, true);
|
||||
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
|
||||
}
|
||||
// the rcGlobal was flushed, empty it
|
||||
count += rcGlobal.size();
|
||||
rcGlobal.clear();
|
||||
}
|
||||
while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) {
|
||||
allcount += flushResults();
|
||||
|
||||
// wait a little bit before trying again
|
||||
try {Thread.sleep(3000);} catch (InterruptedException e) {}
|
||||
if (System.currentTimeMillis() - starttime > 90000) {
|
||||
|
@ -295,10 +279,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
|
|||
log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(","));
|
||||
}
|
||||
|
||||
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
|
||||
|
||||
serverLog.logFine("PLASMA", "FINISHED FLUSHING " + allcount + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
|
||||
|
||||
// finally delete the temporary index
|
||||
rcGlobal = null;
|
||||
|
||||
flushThreads.remove(this);
|
||||
}
|
||||
|
||||
public int flushResults() {
|
||||
// flush the rcGlobal as much as is there so far
|
||||
// this must be called sometime after search results had been computed
|
||||
int count = 0;
|
||||
if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
|
||||
synchronized (rcGlobal) {
|
||||
String wordHash;
|
||||
Iterator hashi = query.queryHashes.iterator();
|
||||
while (hashi.hasNext()) {
|
||||
wordHash = (String) hashi.next();
|
||||
rcGlobal.setWordHash(wordHash);
|
||||
wordIndex.addEntries(rcGlobal, true);
|
||||
log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
|
||||
}
|
||||
// the rcGlobal was flushed, empty it
|
||||
count += rcGlobal.size();
|
||||
rcGlobal.clear();
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -408,16 +408,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
|
|||
|
||||
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
|
||||
long start = System.currentTimeMillis();
|
||||
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
|
||||
if (container == null) {
|
||||
container = new plasmaWordIndexEntryContainer(wordHash);
|
||||
|
||||
plasmaWordIndexEntryContainer container;
|
||||
synchronized (cache) {
|
||||
// get from cache
|
||||
container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
|
||||
if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
|
||||
|
||||
// get from assortments
|
||||
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2));
|
||||
|
||||
// get from backend
|
||||
if (maxTime > 0) {
|
||||
maxTime = maxTime - (System.currentTimeMillis() - start);
|
||||
if (maxTime < 0) maxTime = 100;
|
||||
}
|
||||
container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime));
|
||||
}
|
||||
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 1) ? -1 : 8 * maxTime / 10));
|
||||
if (maxTime > 0) {
|
||||
maxTime -= System.currentTimeMillis() - start;
|
||||
if (maxTime < 0) maxTime = 0;
|
||||
}
|
||||
container.add(backend.getContainer(wordHash, deleteIfEmpty, maxTime));
|
||||
return container;
|
||||
}
|
||||
|
||||
|
|
|
@ -323,7 +323,7 @@ public final class plasmaWordIndexDistribution {
|
|||
Iterator urlIter;
|
||||
plasmaWordIndexEntry indexEntry;
|
||||
plasmaCrawlLURL.Entry lurl;
|
||||
int notBoundCounter = 0;
|
||||
|
||||
final HashMap knownURLs = new HashMap();
|
||||
while (
|
||||
(count > 0) &&
|
||||
|
@ -335,6 +335,7 @@ public final class plasmaWordIndexDistribution {
|
|||
) {
|
||||
// make an on-the-fly entity and insert values
|
||||
indexContainer = this.wordIndex.getContainer(nexthash, true, 10000);
|
||||
int notBoundCounter = 0;
|
||||
try {
|
||||
urlIter = indexContainer.entries();
|
||||
// iterate over indexes to fetch url entries and store them in the urlCache
|
||||
|
|
|
@ -114,6 +114,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
|
|||
|
||||
public int add(plasmaWordIndexEntryContainer c) {
|
||||
// returns the number of new elements
|
||||
if (c == null) return 0;
|
||||
Iterator i = c.entries();
|
||||
int x = 0;
|
||||
while (i.hasNext()) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user