Added a button to manually refresh sorting of p2p search results.

As a server-side oriented alternative to the JavaScript realtime
resorting feature proposed in PR #104.
The goal is the same as in this PR : having the possibility compensate
the network latency of various peers results fetching and obtain once
possible a consistently ranked result set.
This commit is contained in:
luccioman 2017-08-28 19:03:51 +02:00
parent 31c99b2a8a
commit a1a0515312
5 changed files with 199 additions and 58 deletions

View File

@ -110,6 +110,17 @@ function statistics(offset, itemscount, itemsperpage, totalcount, localResourceS
}
}
/* Display the eventual button allowing to refresh the sort of cached results
* only when all feeds are terminated and when there is more than one result */
var resortCachedElement = document.getElementById("resortCached");
if(resortCachedElement != null) {
if(feedRunning) {
resortCachedElement.style.visibility = "hidden";
} else if(totalcountIntValue > 1){
resortCachedElement.style.visibility = "visible";
}
}
if (totalcountIntValue == 0) {
return;
}

View File

@ -80,6 +80,12 @@ Use the RSS search result format to add static searches to your RSS reader, if y
placeholder="#[promoteSearchPageGreeting]#" value="#[former]#" #(focus)#::autofocus="autofocus"#(/focus)# onFocus="this.select()" onclick="document.getElementById('Enter').innerHTML = 'search'"/>
<div class="input-group-btn">
<button id="Enter" name="Enter" class="btn btn-default" type="submit">search</button>
#(resortEnabled)#::
<a id="resortCached" class="btn btn-default" style="visibility: hidden;" class="btn btn-default" role="button" href="#[url]#"
title="Refresh sorting. Depending on their rank, some results fetched in background may then appear on this page.">
<span class="glyphicon glyphicon-sort"></span>
</a>
#(/resortEnabled)#
</div>
</div>
<input type="hidden" name="contentdom" id="contentdom" value="#[contentdom]#" />
@ -106,7 +112,9 @@ Use the RSS search result format to add static searches to your RSS reader, if y
<div id="results"></div>
<div class="progress">
<div class="progress-bar progress-bar-info" id="progressbar" role="progressbar" aria-valuemin="0" aria-valuemax="100" style="width:0%;">
<span style="position:absolute;display:block;text-align:left;width:100%;color:black;">&nbsp;&nbsp;&nbsp;<strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)# <span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span></span>
<span style="position:absolute;display:block;text-align:left;width:85%;color:black;">&nbsp;&nbsp;&nbsp;<strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)#
<span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span>
</span>
</div>
</div>
::

View File

@ -684,7 +684,8 @@ public class yacysearch {
final long timestamp = System.currentTimeMillis();
// create a new search event
if ( SearchEventCache.getEvent(theQuery.id(false)) == null ) {
final SearchEvent cachedEvent = SearchEventCache.getEvent(theQuery.id(false));
if (cachedEvent == null) {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
startRecord = 0;
}
@ -702,6 +703,10 @@ public class yacysearch {
sb.getConfigLong(
SwitchboardConstants.REMOTESEARCH_MAXTIME_USER,
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)));
if(post.getBoolean("resortCachedResults") && cachedEvent == theSearch) {
theSearch.resortCachedResults();
}
if ( startRecord == 0 && authenticated && !stealthmode ) {
if ( modifier.sitehost != null && sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ) {
@ -822,6 +827,13 @@ public class yacysearch {
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
/* In p2p mode only, add a link allowing user to resort already drained results,
* eventually including fetched results with higher ranks from the Solr and RWI stacks */
prop.put("resortEnabled", global && !stealthmode && theSearch.resortCacheAllowed.availablePermits() > 0 ? 1 : 0);
prop.put("resortEnabled_url",
QueryParams.navurlBase(RequestHeader.FileType.HTML, theQuery, null, true).append("&startRecord=")
.append(startRecord).append("&resortCachedResults=true").toString());
// generate the search result lines; the content will be produced by another servlet
for ( int i = 0; i < theQuery.itemsPerPage(); i++ ) {

View File

@ -115,8 +115,6 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
* elements that had been on the stack cannot be put in again,
* they are checked against the drained list
* @param element the element (must have a equals() method)
* @param weight the weight of the element
* @param remove - the rating of the element that shall be removed in case that the stack has an size overflow
*/
public synchronized void put(final Element<E> element) {
// put the element on the stack
@ -170,6 +168,19 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
return element;
}
/**
* Enqueue again all drained elements. Do nothing when there is no internal drained list.
*/
public synchronized void requeueDrainedElements() {
if(this.drained != null) {
final int initialDrainedSize = this.drained.size();
for(int step = 0; step < initialDrainedSize; step++) {
Element<E> element = this.drained.remove(this.drained.size() - 1);
put(element);
}
}
}
/**
* remove a drained element
* @param element
@ -189,13 +200,24 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
*/
/**
* return the element with the smallest weight, but do not remove it
* Return the element with the smallest weight from the internal queue, but do not remove it
* @return null if no element is on the queue or the head of the queue
*/
public synchronized Element<E> peek() {
if (this.queue.isEmpty()) return null;
return this.queue.first();
}
/**
* Return the element with the highest weight, but do not remove it
* @return null if no element is on the queue or the tail of the queue
*/
public synchronized Element<E> getLastInQueue() {
if (this.queue.isEmpty()) {
return null;
}
return this.queue.last();
}
/**
* all objects that have been returned by poll or take are stored in a back-up list

View File

@ -42,10 +42,13 @@ import java.util.SortedSet;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
@ -89,9 +92,9 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.RemoteSearch;
import net.yacy.peers.SeedDB;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
@ -104,8 +107,6 @@ import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.snippet.TextSnippet;
import net.yacy.search.snippet.TextSnippet.ResultClass;
import org.apache.solr.common.SolrDocument;
public final class SearchEvent {
private static final int max_results_rwi = 3000;
@ -254,6 +255,9 @@ public final class SearchEvent {
/** the number of peers which contributed to the remote search result */
public final AtomicInteger remote_solr_peerCount;
/** Ensure only one {@link #resortCachedResults()} operation to be performed on this search event */
public final Semaphore resortCacheAllowed;
public int getResultCount() {
return Math.max(
this.local_rwi_available.get() + this.remote_rwi_available.get() +
@ -355,6 +359,7 @@ public final class SearchEvent {
this.remote_solr_stored = new AtomicInteger(0);
this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers
this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed
this.resortCacheAllowed = new Semaphore(1);
final long start = System.currentTimeMillis();
// do a soft commit for fresh results
@ -1412,58 +1417,25 @@ public final class SearchEvent {
/**
* Adds the retrieved results (fulltext & rwi) to the result list and
* computes the text snippets
* @param concurrentSnippetFetch when true, allow starting concurrent tasks to fetch snippets when no one are already available
* @return true on adding entries to resultlist otherwise false
*/
public boolean drainStacksToResult() {
public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
// we take one entry from both stacks at the same time
boolean success = false;
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
if (node != null) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),
CacheStrategy.CACHEONLY,
false,
180,
false);
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re, localEntryElement.getWeight());
success = true;
} else {
// we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
if (SearchEvent.this.snippetFetchAlive.get() >= 10) {
// too many concurrent processes
addResult(getSnippet(node, null), localEntryElement.getWeight());
success = true;
} else {
boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
return solrSuccess || rwiSuccess;
}
new Thread("SearchEvent.drainStacksToResult.getSnippet") {
@Override
public void run() {
SearchEvent.this.oneFeederStarted();
try {
SearchEvent.this.snippetFetchAlive.incrementAndGet();
try {
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
} catch (final Throwable e) {} finally {
SearchEvent.this.snippetFetchAlive.decrementAndGet();
}
} catch (final Throwable e) {} finally {
SearchEvent.this.oneFeederTerminated();
}
}
}.start();
}
}
}
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) {
/**
* Adds the retrieved results from local and remotes RWI to the result list and
* computes the text snippets
* @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available
* @return true when an entry has been effectively added to resultlist otherwise false
*/
private boolean drainRWIStackToResult(boolean concurrentSnippetFetch) {
boolean success = false;
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus() || !concurrentSnippetFetch) {
// too many concurrent processes
final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
if (noderwi != null) {
@ -1495,7 +1467,64 @@ public final class SearchEvent {
if (SearchEvent.this.query.snippetCacheStrategy == null) t.run(); else t.start(); //no need for concurrency if there is no latency
}
return success;
}
}
/**
* Adds the retrieved full text results from local and remotes Solr to the result list and
* computes the text snippets
* @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available
* @return true when an entry has been effectively added to resultlist otherwise false
*/
private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
boolean success = false;
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
if (node != null) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),
CacheStrategy.CACHEONLY,
false,
180,
false);
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re, localEntryElement.getWeight());
success = true;
} else {
// we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || !concurrentSnippetFetch) {
// too many concurrent processes
addResult(getSnippet(node, null), localEntryElement.getWeight());
success = true;
} else {
new Thread("SearchEvent.drainStacksToResult.getSnippet") {
@Override
public void run() {
SearchEvent.this.oneFeederStarted();
try {
SearchEvent.this.snippetFetchAlive.incrementAndGet();
try {
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
} catch (final Throwable e) {} finally {
SearchEvent.this.snippetFetchAlive.decrementAndGet();
}
} catch (final Throwable e) {} finally {
SearchEvent.this.oneFeederTerminated();
}
}
}.start();
}
}
}
return success;
}
/**
* place the result to the result vector and apply post-ranking
@ -1689,7 +1718,7 @@ public final class SearchEvent {
while ( this.resultList.sizeAvailable() <= resultListIndex &&
(this.rwiQueueSize() > 0 || this.nodeStack.sizeQueue() > 0 ||
(!this.isFeedingFinished() && System.currentTimeMillis() < finishTime))) {
if (!drainStacksToResult()) {
if (!drainStacksToResult(true)) {
try {
Thread.sleep(10);
} catch (final InterruptedException e) {
@ -1856,6 +1885,65 @@ public final class SearchEvent {
}
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
}
/**
* Re-sort results cached in the resultList and eventually include in that list
* elements with higher ranks from the Solr and RWI stacks.
*/
public void resortCachedResults() {
/*
* If stacks feeding is finished, drain as much as possible elements from stacks
* while their ranking is higher than the last element in the result list
*/
if (isFeedingFinished() && this.resortCacheAllowed.tryAcquire()) {
/*
* First put all elements of the resultList in its own sorted queue to have a
* consistent sorting on the whole set
*/
this.resultList.requeueDrainedElements();
/*
* Note : if the resultList is full (its maxSize has been reached) some elements
* with the lowest ranking may be lost in this next step. Not really a problem
* because they were not supposed to be here. If really necessary to keep them,
* growing the maxSize of the resultList should be considered here.
*/
WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
/*
* Drain stacks in two steps (Solr, then RWI), because one stack might still
* contains higher ranked results when only lower ranked remain in the other
*/
/*
* Here we do not fetch snippets concurrently as we want to know immediately the
* drained element position in the final result list
*/
boolean drained = drainSolrStackToResult(false);
WeakPriorityBlockingQueue.Element<URIMetadataNode> newLastResult = this.resultList.getLastInQueue();
/*
* Loop while at least one element has been added to the results list and is not
* the last considering its final rank
*/
while (drained && newLastResult == initialLastResult) {
drained = drainSolrStackToResult(false);
newLastResult = this.resultList.getLastInQueue();
}
drained = drainRWIStackToResult(false);
newLastResult = this.resultList.getLastInQueue();
/*
* Loop while at least one element has been added to the results list and is not
* the last considering its final rank
*/
while (drained && newLastResult == initialLastResult) {
drained = drainRWIStackToResult(false);
newLastResult = this.resultList.getLastInQueue();
}
}
}
/**
* delete a specific entry from the search results