Added a button to manually refresh sorting of p2p search results.

As a server-side oriented alternative to the JavaScript realtime resorting feature proposed in PR #104. The goal is the same as in this PR : having the possibility compensate the network latency of various peers results fetching and obtain once possible a consistently ranked result set.
2024-09-19 00:01:41 +02:00 · 2017-08-28 19:03:51 +02:00 · 2017-08-28 19:03:51 +02:00 · a1a0515312
commit a1a0515312
parent 31c99b2a8a
5 changed files with 199 additions and 58 deletions
--- a/htroot/js/yacysearch.js
+++ b/htroot/js/yacysearch.js
@ -110,6 +110,17 @@ function statistics(offset, itemscount, itemsperpage, totalcount, localResourceS
 	  }
  }
  
+  /* Display the eventual button allowing to refresh the sort of cached results 
+   * only when all feeds are terminated and when there is more than one result */
+  var resortCachedElement = document.getElementById("resortCached");
+  if(resortCachedElement != null) {
+	  if(feedRunning) {
+		  resortCachedElement.style.visibility = "hidden";
+	  } else if(totalcountIntValue > 1){
+		  resortCachedElement.style.visibility = "visible";
+	  }
+  }
+  
  if (totalcountIntValue == 0) {
 	  return;
  }
--- a/htroot/yacysearch.html
+++ b/htroot/yacysearch.html
@ -80,6 +80,12 @@ Use the RSS search result format to add static searches to your RSS reader, if y
    	placeholder="#[promoteSearchPageGreeting]#" value="#[former]#" #(focus)#::autofocus="autofocus"#(/focus)# onFocus="this.select()" onclick="document.getElementById('Enter').innerHTML = 'search'"/>
    <div class="input-group-btn">
       <button id="Enter" name="Enter" class="btn btn-default" type="submit">search</button>
+       #(resortEnabled)#::
+       <a id="resortCached" class="btn btn-default" style="visibility: hidden;" class="btn btn-default" role="button" href="#[url]#" 
+       		title="Refresh sorting. Depending on their rank, some results fetched in background may then appear on this page.">
+        	<span  class="glyphicon glyphicon-sort"></span>
+       </a>
+       #(/resortEnabled)#
    </div>
  </div>
    <input type="hidden" name="contentdom" id="contentdom" value="#[contentdom]#" />
@ -106,7 +112,9 @@ Use the RSS search result format to add static searches to your RSS reader, if y
 	<div id="results"></div>
    <div class="progress">
      <div class="progress-bar progress-bar-info" id="progressbar" role="progressbar" aria-valuemin="0" aria-valuemax="100" style="width:0%;">
-        <span style="position:absolute;display:block;text-align:left;width:100%;color:black;">&nbsp;&nbsp;&nbsp;<strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)# <span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span></span>
+        <span style="position:absolute;display:block;text-align:left;width:85%;color:black;">&nbsp;&nbsp;&nbsp;<strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)# 
+        	<span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span>
+        </span>
      </div>
    </div>
 	::
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -684,7 +684,8 @@ public class yacysearch {
            final long timestamp = System.currentTimeMillis();

            // create a new search event
-            if ( SearchEventCache.getEvent(theQuery.id(false)) == null ) {
+            final SearchEvent cachedEvent = SearchEventCache.getEvent(theQuery.id(false));
+            if (cachedEvent == null) {
                theQuery.setOffset(0); // in case that this is a new search, always start without a offset
                startRecord = 0;
            }
@ -702,6 +703,10 @@ public class yacysearch {
                    sb.getConfigLong(
                        SwitchboardConstants.REMOTESEARCH_MAXTIME_USER,
                        sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)));
+            
+            if(post.getBoolean("resortCachedResults") && cachedEvent == theSearch) {
+            	theSearch.resortCachedResults();
+            }

            if ( startRecord == 0 && authenticated && !stealthmode ) {
                if ( modifier.sitehost != null && sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ) {
@ -822,6 +827,13 @@ public class yacysearch {
            prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
            prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
            prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
+            
+			/* In p2p mode only, add a link allowing user to resort already drained results,
+			 * eventually including fetched results with higher ranks from the Solr and RWI stacks */
+			prop.put("resortEnabled", global && !stealthmode && theSearch.resortCacheAllowed.availablePermits() > 0 ? 1 : 0);
+			prop.put("resortEnabled_url",
+					QueryParams.navurlBase(RequestHeader.FileType.HTML, theQuery, null, true).append("&startRecord=")
+							.append(startRecord).append("&resortCachedResults=true").toString());

            // generate the search result lines; the content will be produced by another servlet
            for ( int i = 0; i < theQuery.itemsPerPage(); i++ ) {
--- a/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java
+++ b/source/net/yacy/cora/sorting/WeakPriorityBlockingQueue.java
@ -115,8 +115,6 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
     * elements that had been on the stack cannot be put in again,
     * they are checked against the drained list
     * @param element the element (must have a equals() method)
-     * @param weight the weight of the element
-     * @param remove - the rating of the element that shall be removed in case that the stack has an size overflow
     */
    public synchronized void put(final Element<E> element) {
        // put the element on the stack
@ -170,6 +168,19 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
        return element;
    }
    
+    /**
+     * Enqueue again all drained elements. Do nothing when there is no internal drained list.
+     */
+    public synchronized void requeueDrainedElements() {
+    	if(this.drained != null) {
+    		final int initialDrainedSize = this.drained.size();
+    		for(int step = 0; step < initialDrainedSize; step++) {
+    			Element<E> element = this.drained.remove(this.drained.size() - 1);
+    			put(element);
+    		}
+    	}
+    }
+    
    /**
     * remove a drained element
     * @param element
@ -189,13 +200,24 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
    */
    
    /**
-     * return the element with the smallest weight, but do not remove it
+     * Return the element with the smallest weight from the internal queue, but do not remove it
     * @return null if no element is on the queue or the head of the queue
     */
    public synchronized Element<E> peek() {
        if (this.queue.isEmpty()) return null;
        return this.queue.first();
    }
+    
+    /**
+     * Return the element with the highest weight, but do not remove it
+     * @return null if no element is on the queue or the tail of the queue
+     */
+    public synchronized Element<E> getLastInQueue() {
+        if (this.queue.isEmpty()) {
+        	return null;
+        }
+        return this.queue.last();
+    }

    /**
     * all objects that have been returned by poll or take are stored in a back-up list
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@ -42,10 +42,13 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.regex.Pattern;

+import org.apache.solr.common.SolrDocument;
+
 import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
 import net.yacy.cora.document.analysis.Classification;
 import net.yacy.cora.document.analysis.Classification.ContentDomain;
@ -89,9 +92,9 @@ import net.yacy.kelondro.util.SetTools;
 import net.yacy.peers.RemoteSearch;
 import net.yacy.peers.SeedDB;
 import net.yacy.peers.graphics.ProfilingGraph;
+import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.repository.FilterEngine;
 import net.yacy.repository.LoaderDispatcher;
-import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.EventTracker;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
@ -104,8 +107,6 @@ import net.yacy.search.schema.CollectionSchema;
 import net.yacy.search.snippet.TextSnippet;
 import net.yacy.search.snippet.TextSnippet.ResultClass;

-import org.apache.solr.common.SolrDocument;
-
 public final class SearchEvent {

    private static final int max_results_rwi = 3000;
@ -254,6 +255,9 @@ public final class SearchEvent {
    /** the number of peers which contributed to the remote search result */
    public final AtomicInteger remote_solr_peerCount;
    
+    /** Ensure only one {@link #resortCachedResults()} operation to be performed on this search event */
+    public final Semaphore resortCacheAllowed;
+    
    public int getResultCount() {
        return Math.max(
                this.local_rwi_available.get() + this.remote_rwi_available.get() +
@ -355,6 +359,7 @@ public final class SearchEvent {
        this.remote_solr_stored   = new AtomicInteger(0);
        this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers
        this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed
+        this.resortCacheAllowed = new Semaphore(1);
        final long start = System.currentTimeMillis();

        // do a soft commit for fresh results
@ -1412,58 +1417,25 @@ public final class SearchEvent {
    /**
     * Adds the retrieved results (fulltext & rwi) to the result list and
     * computes the text snippets
+     * @param concurrentSnippetFetch when true, allow starting concurrent tasks to fetch snippets when no one are already available
     * @return true on adding entries to resultlist otherwise false
     */
-    public boolean drainStacksToResult() {
+    public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
        // we take one entry from both stacks at the same time
-        boolean success = false;
-        final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
-        final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
-        if (node != null) {
-            LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
-            if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
-                OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
-                final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
-                final TextSnippet yacysnippet = new TextSnippet(this.loader,
-                        node,
-                        this.query.getQueryGoal().getIncludeHashes(),
-                        CacheStrategy.CACHEONLY,
-                        false,
-                        180,
-                        false);
-                final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
-                final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
-                URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() >  yacysnippetline.length() ? solrsnippet : yacysnippet);
-                addResult(re, localEntryElement.getWeight());
-                success = true;
-            } else {
-                // we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
-                if (SearchEvent.this.snippetFetchAlive.get() >= 10) {
-                    // too many concurrent processes
-                    addResult(getSnippet(node, null), localEntryElement.getWeight());
-                    success = true;
-                } else {
+        boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
+        boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
+        return solrSuccess || rwiSuccess;
+    }

-                    new Thread("SearchEvent.drainStacksToResult.getSnippet") {
-                        @Override
-                        public void run() {
-                            SearchEvent.this.oneFeederStarted();
-                            try {
-                                SearchEvent.this.snippetFetchAlive.incrementAndGet();
-                                try {
-                                    addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
-                                } catch (final Throwable e) {} finally {
-                                    SearchEvent.this.snippetFetchAlive.decrementAndGet();
-                                }
-                            } catch (final Throwable e) {} finally {
-                                SearchEvent.this.oneFeederTerminated();
-                            }
-                        }
-                    }.start();
-                }
-            }
-        }
-        if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) {
+    /**
+     * Adds the retrieved results from local and remotes RWI to the result list and
+     * computes the text snippets
+     * @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available 
+     * @return true when an entry has been effectively added to resultlist otherwise false
+     */
+	private boolean drainRWIStackToResult(boolean concurrentSnippetFetch) {
+		boolean success = false;
+		if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus() || !concurrentSnippetFetch) {
            // too many concurrent processes
            final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
            if (noderwi != null) {
@ -1495,7 +1467,64 @@ public final class SearchEvent {
            if (SearchEvent.this.query.snippetCacheStrategy == null) t.run(); else t.start(); //no need for concurrency if there is no latency
        }
        return success;
-    }
+	}
+
+    /**
+     * Adds the retrieved full text results from local and remotes Solr to the result list and
+     * computes the text snippets
+     * @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available 
+     * @return true when an entry has been effectively added to resultlist otherwise false
+     */
+	private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
+		boolean success = false;
+		final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
+        final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
+        if (node != null) {
+            LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
+            if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
+                OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
+                final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
+                final TextSnippet yacysnippet = new TextSnippet(this.loader,
+                        node,
+                        this.query.getQueryGoal().getIncludeHashes(),
+                        CacheStrategy.CACHEONLY,
+                        false,
+                        180,
+                        false);
+                final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
+                final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
+                URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() >  yacysnippetline.length() ? solrsnippet : yacysnippet);
+                addResult(re, localEntryElement.getWeight());
+                success = true;
+            } else {
+                // we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
+                if (SearchEvent.this.snippetFetchAlive.get() >= 10 || !concurrentSnippetFetch) {
+                    // too many concurrent processes
+                    addResult(getSnippet(node, null), localEntryElement.getWeight());
+                    success = true;
+                } else {
+
+                    new Thread("SearchEvent.drainStacksToResult.getSnippet") {
+                        @Override
+                        public void run() {
+                            SearchEvent.this.oneFeederStarted();
+                            try {
+                                SearchEvent.this.snippetFetchAlive.incrementAndGet();
+                                try {
+                                    addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
+                                } catch (final Throwable e) {} finally {
+                                    SearchEvent.this.snippetFetchAlive.decrementAndGet();
+                                }
+                            } catch (final Throwable e) {} finally {
+                                SearchEvent.this.oneFeederTerminated();
+                            }
+                        }
+                    }.start();
+                }
+            }
+        }
+		return success;
+	}
    
    /**
     * place the result to the result vector and apply post-ranking
@ -1689,7 +1718,7 @@ public final class SearchEvent {
        while ( this.resultList.sizeAvailable() <= resultListIndex &&
                (this.rwiQueueSize() > 0 || this.nodeStack.sizeQueue() > 0 ||
                (!this.isFeedingFinished() && System.currentTimeMillis() < finishTime))) {
-			if (!drainStacksToResult()) {
+			if (!drainStacksToResult(true)) {
 				try {
 					Thread.sleep(10);
 				} catch (final InterruptedException e) {
@ -1856,6 +1885,65 @@ public final class SearchEvent {
        }
        return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
    }
+    
+	/**
+	 * Re-sort results cached in the resultList and eventually include in that list
+	 * elements with higher ranks from the Solr and RWI stacks.
+	 */
+	public void resortCachedResults() {
+		/*
+		 * If stacks feeding is finished, drain as much as possible elements from stacks
+		 * while their ranking is higher than the last element in the result list
+		 */
+		if (isFeedingFinished() && this.resortCacheAllowed.tryAcquire()) {
+			/*
+			 * First put all elements of the resultList in its own sorted queue to have a
+			 * consistent sorting on the whole set
+			 */
+			this.resultList.requeueDrainedElements();
+
+			/*
+			 * Note : if the resultList is full (its maxSize has been reached) some elements
+			 * with the lowest ranking may be lost in this next step. Not really a problem
+			 * because they were not supposed to be here. If really necessary to keep them,
+			 * growing the maxSize of the resultList should be considered here.
+			 */
+			WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
+
+			/*
+			 * Drain stacks in two steps (Solr, then RWI), because one stack might still
+			 * contains higher ranked results when only lower ranked remain in the other
+			 */
+
+			/*
+			 * Here we do not fetch snippets concurrently as we want to know immediately the
+			 * drained element position in the final result list
+			 */
+			boolean drained = drainSolrStackToResult(false);
+			WeakPriorityBlockingQueue.Element<URIMetadataNode> newLastResult = this.resultList.getLastInQueue();
+
+			/*
+			 * Loop while at least one element has been added to the results list and is not
+			 * the last considering its final rank
+			 */
+			while (drained && newLastResult == initialLastResult) {
+				drained = drainSolrStackToResult(false);
+				newLastResult = this.resultList.getLastInQueue();
+			}
+
+			drained = drainRWIStackToResult(false);
+			newLastResult = this.resultList.getLastInQueue();
+
+			/*
+			 * Loop while at least one element has been added to the results list and is not
+			 * the last considering its final rank
+			 */
+			while (drained && newLastResult == initialLastResult) {
+				drained = drainRWIStackToResult(false);
+				newLastResult = this.resultList.getLastInQueue();
+			}
+		}
+	}

    /**
     * delete a specific entry from the search results