fixed bug that caused wrong behavior of search result preparation

(second search on same topic resulted in less links) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1502 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2006-01-31 01:20:28 +00:00 · 2006-01-31 01:20:28 +00:00 · 3834675084
commit 3834675084
parent 31c8476b5d
4 changed files with 53 additions and 36 deletions
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@ -118,6 +118,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
            plasmaSearchResult result = order();
            result.globalContributions = globalContributions;
            result.localContributions = rcLocal.size();
+            flushResults();
            
            // flush results in a separate thread
            this.start(); // start to flush results
@ -256,35 +257,18 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
    
    public void run() {
        flushThreads.add(this); // this will care that the search event object is referenced from somewhere while it is still alive
-        flushResults();
-        flushThreads.remove(this);
-    }
-    
-    public void flushResults() {
+
        // put all new results into wordIndex
        // this must be called after search results had been computed
-        // it is wise to call this within a separate thread because this method waits untill all
-        if (searchThreads == null) return;
+        // it is wise to call this within a separate thread because
+        // this method waits until all threads are finished

-        // wait until all threads are finished
        int remaining;
-        int count = 0;
-        String wordHash;
+        int allcount = 0;
        long starttime = System.currentTimeMillis();
-        while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) {
-            // flush the rcGlobal as much as is there so far
-            if (rcGlobal.size() > 0) synchronized (rcGlobal) {
-                Iterator hashi = query.queryHashes.iterator();
-                while (hashi.hasNext()) {
-                    wordHash = (String) hashi.next();
-                    rcGlobal.setWordHash(wordHash);
-                    wordIndex.addEntries(rcGlobal, true);
-                    log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
-                }
-                // the rcGlobal was flushed, empty it
-                count += rcGlobal.size();
-                rcGlobal.clear();
-            }    
+        while ((searchThreads != null) && ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0)) {
+            allcount += flushResults();
+  
            // wait a little bit before trying again
            try {Thread.sleep(3000);} catch (InterruptedException e) {}
            if (System.currentTimeMillis() - starttime > 90000) {
@ -295,10 +279,34 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
            log.logFine("FINISHED FLUSH RESULTS PROCESS for query " + query.hashes(","));
        }
        
-        serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
-	        
+        serverLog.logFine("PLASMA", "FINISHED FLUSHING " + allcount + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
+            
        // finally delete the temporary index
        rcGlobal = null;
+        
+        flushThreads.remove(this);
+    }
+    
+    public int flushResults() {
+        // flush the rcGlobal as much as is there so far
+        // this must be called sometime after search results had been computed
+        int count = 0;
+        if ((rcGlobal != null) && (rcGlobal.size() > 0)) {
+            synchronized (rcGlobal) {
+                String wordHash;
+                Iterator hashi = query.queryHashes.iterator();
+                while (hashi.hasNext()) {
+                    wordHash = (String) hashi.next();
+                    rcGlobal.setWordHash(wordHash);
+                    wordIndex.addEntries(rcGlobal, true);
+                    log.logFine("FLUSHED " + wordHash + ": " + rcGlobal.size() + " url entries");
+                }
+                // the rcGlobal was flushed, empty it
+                count += rcGlobal.size();
+                rcGlobal.clear();
+            }
+        }
+        return count;
    }
    
 }
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@ -408,16 +408,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {

    public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
        long start = System.currentTimeMillis();
-        plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
-        if (container == null) {
-            container = new plasmaWordIndexEntryContainer(wordHash);
+        
+        plasmaWordIndexEntryContainer container;
+        synchronized (cache) {
+            // get from cache
+            container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
+            if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
+
+            // get from assortments
+            container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2));
+
+            // get from backend
+            if (maxTime > 0) {
+                maxTime = maxTime - (System.currentTimeMillis() - start);
+                if (maxTime < 0) maxTime = 100;
+            }
+            container.add(backend.getContainer(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime));
        }
-        container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 1) ? -1 : 8 * maxTime / 10));
-        if (maxTime > 0) {
-            maxTime -= System.currentTimeMillis() - start;
-            if (maxTime < 0) maxTime = 0;
-        }
-        container.add(backend.getContainer(wordHash, deleteIfEmpty, maxTime));
        return container;
    }

--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@ -323,7 +323,7 @@ public final class plasmaWordIndexDistribution {
            Iterator urlIter;
            plasmaWordIndexEntry indexEntry;
            plasmaCrawlLURL.Entry lurl;
-            int notBoundCounter = 0;
+            
            final HashMap knownURLs = new HashMap();
            while (
                    (count > 0) &&
@ -335,6 +335,7 @@ public final class plasmaWordIndexDistribution {
            ) {
                // make an on-the-fly entity and insert values
                    indexContainer = this.wordIndex.getContainer(nexthash, true, 10000);
+                    int notBoundCounter = 0;
                    try {
                        urlIter = indexContainer.entries();
                        // iterate over indexes to fetch url entries and store them in the urlCache
--- a/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntryContainer.java
@ -114,6 +114,7 @@ public final class plasmaWordIndexEntryContainer implements Comparable {
    
    public int add(plasmaWordIndexEntryContainer c) {
        // returns the number of new elements
+        if (c == null) return 0;
        Iterator i = c.entries();
        int x = 0;
        while (i.hasNext()) {