mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Added a button to manually refresh sorting of p2p search results.
As a server-side oriented alternative to the JavaScript realtime resorting feature proposed in PR #104. The goal is the same as in this PR : having the possibility compensate the network latency of various peers results fetching and obtain once possible a consistently ranked result set.
This commit is contained in:
parent
31c99b2a8a
commit
a1a0515312
|
@ -110,6 +110,17 @@ function statistics(offset, itemscount, itemsperpage, totalcount, localResourceS
|
|||
}
|
||||
}
|
||||
|
||||
/* Display the eventual button allowing to refresh the sort of cached results
|
||||
* only when all feeds are terminated and when there is more than one result */
|
||||
var resortCachedElement = document.getElementById("resortCached");
|
||||
if(resortCachedElement != null) {
|
||||
if(feedRunning) {
|
||||
resortCachedElement.style.visibility = "hidden";
|
||||
} else if(totalcountIntValue > 1){
|
||||
resortCachedElement.style.visibility = "visible";
|
||||
}
|
||||
}
|
||||
|
||||
if (totalcountIntValue == 0) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -80,6 +80,12 @@ Use the RSS search result format to add static searches to your RSS reader, if y
|
|||
placeholder="#[promoteSearchPageGreeting]#" value="#[former]#" #(focus)#::autofocus="autofocus"#(/focus)# onFocus="this.select()" onclick="document.getElementById('Enter').innerHTML = 'search'"/>
|
||||
<div class="input-group-btn">
|
||||
<button id="Enter" name="Enter" class="btn btn-default" type="submit">search</button>
|
||||
#(resortEnabled)#::
|
||||
<a id="resortCached" class="btn btn-default" style="visibility: hidden;" class="btn btn-default" role="button" href="#[url]#"
|
||||
title="Refresh sorting. Depending on their rank, some results fetched in background may then appear on this page.">
|
||||
<span class="glyphicon glyphicon-sort"></span>
|
||||
</a>
|
||||
#(/resortEnabled)#
|
||||
</div>
|
||||
</div>
|
||||
<input type="hidden" name="contentdom" id="contentdom" value="#[contentdom]#" />
|
||||
|
@ -106,7 +112,9 @@ Use the RSS search result format to add static searches to your RSS reader, if y
|
|||
<div id="results"></div>
|
||||
<div class="progress">
|
||||
<div class="progress-bar progress-bar-info" id="progressbar" role="progressbar" aria-valuemin="0" aria-valuemax="100" style="width:0%;">
|
||||
<span style="position:absolute;display:block;text-align:left;width:100%;color:black;"> <strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)# <span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span></span>
|
||||
<span style="position:absolute;display:block;text-align:left;width:85%;color:black;"> <strong id="offset">#[offset]#</strong>-<strong id="itemscount">#[itemscount]#</strong> of <strong id="totalcount">#[totalcount]#</strong> #(globalresults)#::; (<strong id="localResourceSize">#[localResourceSize]#</strong> local, <strong id="remoteResourceSize">#[remoteResourceSize]#</strong> remote), <strong id="remoteIndexCount">#[remoteIndexCount]#</strong> from <strong id="remotePeerCount">#[remotePeerCount]#</strong> remote YaCy peers.#(/globalresults)#
|
||||
<span id="feedingStatus" style="visibility: hidden;" class="glyphicon glyphicon-transfer" title="YaCy server is fetching results from available data sources."></span>
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
::
|
||||
|
|
|
@ -684,7 +684,8 @@ public class yacysearch {
|
|||
final long timestamp = System.currentTimeMillis();
|
||||
|
||||
// create a new search event
|
||||
if ( SearchEventCache.getEvent(theQuery.id(false)) == null ) {
|
||||
final SearchEvent cachedEvent = SearchEventCache.getEvent(theQuery.id(false));
|
||||
if (cachedEvent == null) {
|
||||
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
|
||||
startRecord = 0;
|
||||
}
|
||||
|
@ -702,6 +703,10 @@ public class yacysearch {
|
|||
sb.getConfigLong(
|
||||
SwitchboardConstants.REMOTESEARCH_MAXTIME_USER,
|
||||
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)));
|
||||
|
||||
if(post.getBoolean("resortCachedResults") && cachedEvent == theSearch) {
|
||||
theSearch.resortCachedResults();
|
||||
}
|
||||
|
||||
if ( startRecord == 0 && authenticated && !stealthmode ) {
|
||||
if ( modifier.sitehost != null && sb.getConfigBool(SwitchboardConstants.HEURISTIC_SITE, false) ) {
|
||||
|
@ -822,6 +827,13 @@ public class yacysearch {
|
|||
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.remote_rwi_stored.get() + theSearch.remote_solr_stored.get(), true));
|
||||
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.remote_rwi_available.get() + theSearch.remote_solr_available.get(), true));
|
||||
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.remote_rwi_peerCount.get() + theSearch.remote_solr_peerCount.get(), true));
|
||||
|
||||
/* In p2p mode only, add a link allowing user to resort already drained results,
|
||||
* eventually including fetched results with higher ranks from the Solr and RWI stacks */
|
||||
prop.put("resortEnabled", global && !stealthmode && theSearch.resortCacheAllowed.availablePermits() > 0 ? 1 : 0);
|
||||
prop.put("resortEnabled_url",
|
||||
QueryParams.navurlBase(RequestHeader.FileType.HTML, theQuery, null, true).append("&startRecord=")
|
||||
.append(startRecord).append("&resortCachedResults=true").toString());
|
||||
|
||||
// generate the search result lines; the content will be produced by another servlet
|
||||
for ( int i = 0; i < theQuery.itemsPerPage(); i++ ) {
|
||||
|
|
|
@ -115,8 +115,6 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
|
|||
* elements that had been on the stack cannot be put in again,
|
||||
* they are checked against the drained list
|
||||
* @param element the element (must have a equals() method)
|
||||
* @param weight the weight of the element
|
||||
* @param remove - the rating of the element that shall be removed in case that the stack has an size overflow
|
||||
*/
|
||||
public synchronized void put(final Element<E> element) {
|
||||
// put the element on the stack
|
||||
|
@ -170,6 +168,19 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
|
|||
return element;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue again all drained elements. Do nothing when there is no internal drained list.
|
||||
*/
|
||||
public synchronized void requeueDrainedElements() {
|
||||
if(this.drained != null) {
|
||||
final int initialDrainedSize = this.drained.size();
|
||||
for(int step = 0; step < initialDrainedSize; step++) {
|
||||
Element<E> element = this.drained.remove(this.drained.size() - 1);
|
||||
put(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* remove a drained element
|
||||
* @param element
|
||||
|
@ -189,13 +200,24 @@ public class WeakPriorityBlockingQueue<E> implements Serializable {
|
|||
*/
|
||||
|
||||
/**
|
||||
* return the element with the smallest weight, but do not remove it
|
||||
* Return the element with the smallest weight from the internal queue, but do not remove it
|
||||
* @return null if no element is on the queue or the head of the queue
|
||||
*/
|
||||
public synchronized Element<E> peek() {
|
||||
if (this.queue.isEmpty()) return null;
|
||||
return this.queue.first();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the element with the highest weight, but do not remove it
|
||||
* @return null if no element is on the queue or the tail of the queue
|
||||
*/
|
||||
public synchronized Element<E> getLastInQueue() {
|
||||
if (this.queue.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
return this.queue.last();
|
||||
}
|
||||
|
||||
/**
|
||||
* all objects that have been returned by poll or take are stored in a back-up list
|
||||
|
|
|
@ -42,10 +42,13 @@ import java.util.SortedSet;
|
|||
import java.util.TreeMap;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
|
||||
import net.yacy.cora.document.analysis.Classification;
|
||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||
|
@ -89,9 +92,9 @@ import net.yacy.kelondro.util.SetTools;
|
|||
import net.yacy.peers.RemoteSearch;
|
||||
import net.yacy.peers.SeedDB;
|
||||
import net.yacy.peers.graphics.ProfilingGraph;
|
||||
import net.yacy.repository.Blacklist.BlacklistType;
|
||||
import net.yacy.repository.FilterEngine;
|
||||
import net.yacy.repository.LoaderDispatcher;
|
||||
import net.yacy.repository.Blacklist.BlacklistType;
|
||||
import net.yacy.search.EventTracker;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
|
@ -104,8 +107,6 @@ import net.yacy.search.schema.CollectionSchema;
|
|||
import net.yacy.search.snippet.TextSnippet;
|
||||
import net.yacy.search.snippet.TextSnippet.ResultClass;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
public final class SearchEvent {
|
||||
|
||||
private static final int max_results_rwi = 3000;
|
||||
|
@ -254,6 +255,9 @@ public final class SearchEvent {
|
|||
/** the number of peers which contributed to the remote search result */
|
||||
public final AtomicInteger remote_solr_peerCount;
|
||||
|
||||
/** Ensure only one {@link #resortCachedResults()} operation to be performed on this search event */
|
||||
public final Semaphore resortCacheAllowed;
|
||||
|
||||
public int getResultCount() {
|
||||
return Math.max(
|
||||
this.local_rwi_available.get() + this.remote_rwi_available.get() +
|
||||
|
@ -355,6 +359,7 @@ public final class SearchEvent {
|
|||
this.remote_solr_stored = new AtomicInteger(0);
|
||||
this.remote_solr_available= new AtomicInteger(0); // the number of result contributions from all the remote solr peers
|
||||
this.remote_solr_peerCount= new AtomicInteger(0); // the number of remote solr peers that have contributed
|
||||
this.resortCacheAllowed = new Semaphore(1);
|
||||
final long start = System.currentTimeMillis();
|
||||
|
||||
// do a soft commit for fresh results
|
||||
|
@ -1412,58 +1417,25 @@ public final class SearchEvent {
|
|||
/**
|
||||
* Adds the retrieved results (fulltext & rwi) to the result list and
|
||||
* computes the text snippets
|
||||
* @param concurrentSnippetFetch when true, allow starting concurrent tasks to fetch snippets when no one are already available
|
||||
* @return true on adding entries to resultlist otherwise false
|
||||
*/
|
||||
public boolean drainStacksToResult() {
|
||||
public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
|
||||
// we take one entry from both stacks at the same time
|
||||
boolean success = false;
|
||||
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
|
||||
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
|
||||
if (node != null) {
|
||||
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
|
||||
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
|
||||
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
|
||||
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
|
||||
final TextSnippet yacysnippet = new TextSnippet(this.loader,
|
||||
node,
|
||||
this.query.getQueryGoal().getIncludeHashes(),
|
||||
CacheStrategy.CACHEONLY,
|
||||
false,
|
||||
180,
|
||||
false);
|
||||
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
|
||||
addResult(re, localEntryElement.getWeight());
|
||||
success = true;
|
||||
} else {
|
||||
// we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
|
||||
if (SearchEvent.this.snippetFetchAlive.get() >= 10) {
|
||||
// too many concurrent processes
|
||||
addResult(getSnippet(node, null), localEntryElement.getWeight());
|
||||
success = true;
|
||||
} else {
|
||||
boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
|
||||
boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
|
||||
return solrSuccess || rwiSuccess;
|
||||
}
|
||||
|
||||
new Thread("SearchEvent.drainStacksToResult.getSnippet") {
|
||||
@Override
|
||||
public void run() {
|
||||
SearchEvent.this.oneFeederStarted();
|
||||
try {
|
||||
SearchEvent.this.snippetFetchAlive.incrementAndGet();
|
||||
try {
|
||||
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
|
||||
} catch (final Throwable e) {} finally {
|
||||
SearchEvent.this.snippetFetchAlive.decrementAndGet();
|
||||
}
|
||||
} catch (final Throwable e) {} finally {
|
||||
SearchEvent.this.oneFeederTerminated();
|
||||
}
|
||||
}
|
||||
}.start();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus()) {
|
||||
/**
|
||||
* Adds the retrieved results from local and remotes RWI to the result list and
|
||||
* computes the text snippets
|
||||
* @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available
|
||||
* @return true when an entry has been effectively added to resultlist otherwise false
|
||||
*/
|
||||
private boolean drainRWIStackToResult(boolean concurrentSnippetFetch) {
|
||||
boolean success = false;
|
||||
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || MemoryControl.shortStatus() || !concurrentSnippetFetch) {
|
||||
// too many concurrent processes
|
||||
final URIMetadataNode noderwi = pullOneFilteredFromRWI(true);
|
||||
if (noderwi != null) {
|
||||
|
@ -1495,7 +1467,64 @@ public final class SearchEvent {
|
|||
if (SearchEvent.this.query.snippetCacheStrategy == null) t.run(); else t.start(); //no need for concurrency if there is no latency
|
||||
}
|
||||
return success;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the retrieved full text results from local and remotes Solr to the result list and
|
||||
* computes the text snippets
|
||||
* @param concurrentSnippetFetch when true, allow starting a concurrent task to fetch a snippet when no one is already available
|
||||
* @return true when an entry has been effectively added to resultlist otherwise false
|
||||
*/
|
||||
private boolean drainSolrStackToResult(boolean concurrentSnippetFetch) {
|
||||
boolean success = false;
|
||||
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
|
||||
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
|
||||
if (node != null) {
|
||||
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
|
||||
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
|
||||
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
|
||||
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_CACHE, "");
|
||||
final TextSnippet yacysnippet = new TextSnippet(this.loader,
|
||||
node,
|
||||
this.query.getQueryGoal().getIncludeHashes(),
|
||||
CacheStrategy.CACHEONLY,
|
||||
false,
|
||||
180,
|
||||
false);
|
||||
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
|
||||
addResult(re, localEntryElement.getWeight());
|
||||
success = true;
|
||||
} else {
|
||||
// we don't have a snippet from solr, try to get it in our way (by reloading, if necessary)
|
||||
if (SearchEvent.this.snippetFetchAlive.get() >= 10 || !concurrentSnippetFetch) {
|
||||
// too many concurrent processes
|
||||
addResult(getSnippet(node, null), localEntryElement.getWeight());
|
||||
success = true;
|
||||
} else {
|
||||
|
||||
new Thread("SearchEvent.drainStacksToResult.getSnippet") {
|
||||
@Override
|
||||
public void run() {
|
||||
SearchEvent.this.oneFeederStarted();
|
||||
try {
|
||||
SearchEvent.this.snippetFetchAlive.incrementAndGet();
|
||||
try {
|
||||
addResult(getSnippet(node, SearchEvent.this.query.snippetCacheStrategy), localEntryElement.getWeight());
|
||||
} catch (final Throwable e) {} finally {
|
||||
SearchEvent.this.snippetFetchAlive.decrementAndGet();
|
||||
}
|
||||
} catch (final Throwable e) {} finally {
|
||||
SearchEvent.this.oneFeederTerminated();
|
||||
}
|
||||
}
|
||||
}.start();
|
||||
}
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* place the result to the result vector and apply post-ranking
|
||||
|
@ -1689,7 +1718,7 @@ public final class SearchEvent {
|
|||
while ( this.resultList.sizeAvailable() <= resultListIndex &&
|
||||
(this.rwiQueueSize() > 0 || this.nodeStack.sizeQueue() > 0 ||
|
||||
(!this.isFeedingFinished() && System.currentTimeMillis() < finishTime))) {
|
||||
if (!drainStacksToResult()) {
|
||||
if (!drainStacksToResult(true)) {
|
||||
try {
|
||||
Thread.sleep(10);
|
||||
} catch (final InterruptedException e) {
|
||||
|
@ -1856,6 +1885,65 @@ public final class SearchEvent {
|
|||
}
|
||||
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-sort results cached in the resultList and eventually include in that list
|
||||
* elements with higher ranks from the Solr and RWI stacks.
|
||||
*/
|
||||
public void resortCachedResults() {
|
||||
/*
|
||||
* If stacks feeding is finished, drain as much as possible elements from stacks
|
||||
* while their ranking is higher than the last element in the result list
|
||||
*/
|
||||
if (isFeedingFinished() && this.resortCacheAllowed.tryAcquire()) {
|
||||
/*
|
||||
* First put all elements of the resultList in its own sorted queue to have a
|
||||
* consistent sorting on the whole set
|
||||
*/
|
||||
this.resultList.requeueDrainedElements();
|
||||
|
||||
/*
|
||||
* Note : if the resultList is full (its maxSize has been reached) some elements
|
||||
* with the lowest ranking may be lost in this next step. Not really a problem
|
||||
* because they were not supposed to be here. If really necessary to keep them,
|
||||
* growing the maxSize of the resultList should be considered here.
|
||||
*/
|
||||
WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
|
||||
|
||||
/*
|
||||
* Drain stacks in two steps (Solr, then RWI), because one stack might still
|
||||
* contains higher ranked results when only lower ranked remain in the other
|
||||
*/
|
||||
|
||||
/*
|
||||
* Here we do not fetch snippets concurrently as we want to know immediately the
|
||||
* drained element position in the final result list
|
||||
*/
|
||||
boolean drained = drainSolrStackToResult(false);
|
||||
WeakPriorityBlockingQueue.Element<URIMetadataNode> newLastResult = this.resultList.getLastInQueue();
|
||||
|
||||
/*
|
||||
* Loop while at least one element has been added to the results list and is not
|
||||
* the last considering its final rank
|
||||
*/
|
||||
while (drained && newLastResult == initialLastResult) {
|
||||
drained = drainSolrStackToResult(false);
|
||||
newLastResult = this.resultList.getLastInQueue();
|
||||
}
|
||||
|
||||
drained = drainRWIStackToResult(false);
|
||||
newLastResult = this.resultList.getLastInQueue();
|
||||
|
||||
/*
|
||||
* Loop while at least one element has been added to the results list and is not
|
||||
* the last considering its final rank
|
||||
*/
|
||||
while (drained && newLastResult == initialLastResult) {
|
||||
drained = drainRWIStackToResult(false);
|
||||
newLastResult = this.resultList.getLastInQueue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* delete a specific entry from the search results
|
||||
|
|
Loading…
Reference in New Issue
Block a user