introduction of a new remote search load control: the remote search has taken 10 results per peer with a time-out of 3 seconds so far. The attributes of number of results per peer and time-out time can now be configured.

This has two aspects: the user who searches may want to increase these values to get more results and more load on the remote side and the user of the server which is accessed for this search may want to restrict the load. Both sides can now be configured. The server-site maximum load parameters are defined by a network definition and the client-side search request load can be defined by each user individually but when the remote search is done the requested service is limited to the network definition.

You can find now in the network definition file:
network.unit.remotesearch.maxcount and network.unit.remotesearch.maxtime
and in the yacy.conf file:
remotesearch.maxcount and remotesearch.maxtime

There is currently no web interface to define the client-side remote search attributes, please set them manually
    

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7548 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2011-03-04 13:44:00 +00:00
parent 6dfaf6fef7
commit bed79402be
9 changed files with 60 additions and 29 deletions

View File

@ -723,6 +723,10 @@ search.result.show.metadata = true
search.result.show.parser = true
search.result.show.pictures = true
# remote search details
remotesearch.maxcount = 20
remotesearch.maxtime = 1000
# specifies if yacy should set it's own referer if no referer URL
# was set by the client.
useYacyReferer = false

View File

@ -17,8 +17,11 @@ network.unit.description = Public YaCy Community
# global, local, any
network.unit.domain = global
# maximum search time for remote queries (deprecated)
network.unit.search.time = 4
# maximum search time for remote queries (milliseconds)
network.unit.remotesearch.maxtime = 1000
# maximum number of results per remote query
network.unit.remotesearch.maxcount = 100
# flag to switch on dht transmission
# if the dht transmission is set to 'false' then for a global

View File

@ -95,7 +95,8 @@ public final class search {
final String abstracts = post.get("abstracts", ""); // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
// final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping
final int count = Math.min(100, post.getInt("count", 10)); // maximum number of wanted results
final int count = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 100), post.getInt("count", 10)); // maximum number of wanted results
final long maxtime = Math.min((int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000), post.getLong("time", 3000)); // maximum number of wanted results
final int maxdist= post.getInt("maxdist", Integer.MAX_VALUE);
final String prefer = post.get("prefer", "");
final String contentdom = post.get("contentdom", "text");
@ -295,13 +296,13 @@ public final class search {
yacyChannel.channels(yacyChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
// make event
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, count, maxtime, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
// set statistic details of search result and find best result index set
joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount();
prop.put("joincount", Integer.toString(joincount));
if (joincount != 0) {
accu = theSearch.result().completeResults(3000);
accu = theSearch.result().completeResults(maxtime);
}
if (joincount <= 0 || abstracts.length() == 0) {
prop.put("indexcount", "");

View File

@ -520,7 +520,12 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
final SearchEvent theSearch = SearchEventCache.getEvent(
theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader,
(int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 10)),
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)),
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0),
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
try {Thread.sleep(global ? 100 : 10);} catch (InterruptedException e1) {} // wait a little time to get first results in the search
if (offset == 0) {

View File

@ -87,6 +87,8 @@ public final class SearchEvent {
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader,
final int remote_maxcount,
final long remote_maxtime,
final int burstRobinsonPercent,
final int burstMultiwordPercent) {
if (MemoryControl.available() < 1024 * 1024 * 100) SearchEventCache.cleanupEvents(true);
@ -127,7 +129,8 @@ public final class SearchEvent {
query.targetlang == null ? "" : query.targetlang,
query.sitehash == null ? "" : query.sitehash,
query.authorhash == null ? "" : query.authorhash,
query.displayResults(),
remote_maxcount,
remote_maxtime,
query.maxDistance,
query.getSegment(),
peers,
@ -139,7 +142,7 @@ public final class SearchEvent {
(query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes,
burstRobinsonPercent,
burstMultiwordPercent);
Log.logFine("SEARCH_EVENT", "STARTING " + this.primarySearchThreads.length + " THREADS TO CATCH EACH " + query.displayResults() + " URLs");
Log.logFine("SEARCH_EVENT", "STARTING " + this.primarySearchThreads.length + " THREADS TO CATCH EACH " + remote_maxcount + " URLs");
if (this.primarySearchThreads != null) {
this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
@ -388,8 +391,8 @@ public final class SearchEvent {
* @param wordhash
* @param singleAbstract // a mapping from url-hashes to a string of peer-hashes
*/
public void addAbstract(String wordhash, TreeMap<String, String> singleAbstract) {
SortedMap<String, String> oldAbstract;
public void addAbstract(String wordhash, final TreeMap<String, String> singleAbstract) {
final SortedMap<String, String> oldAbstract;
synchronized (abstractsCache) {
oldAbstract = abstractsCache.get(wordhash);
if (oldAbstract == null) {
@ -399,18 +402,18 @@ public final class SearchEvent {
}
}
// extend the abstracts in the cache: join the single abstracts
new Thread() {
public void run() {
for (final Map.Entry<String, String> oneref: singleAbstract.entrySet()) {
final String urlhash = oneref.getKey();
final String peerlistNew = oneref.getValue();
synchronized (oldAbstract) {
final String peerlistOld = oldAbstract.get(urlhash);
if (peerlistOld == null) {
oldAbstract.put(urlhash, peerlistNew);
} else {
oldAbstract.put(urlhash, peerlistOld + peerlistNew);
final String peerlistOld = oldAbstract.put(urlhash, peerlistNew);
if (peerlistOld != null) oldAbstract.put(urlhash, peerlistOld + peerlistNew);
}
}
}
}.start();
// abstractsCache.put(wordhash, oldAbstract); // put not necessary since it is sufficient to just change the value content (it stays assigned)
}
@ -520,7 +523,7 @@ public final class SearchEvent {
rankingProcess.moreFeeders(1);
checkedPeers.add(peer);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, query.getSegment(), peers, rankingProcess, peer, Switchboard.urlBlacklist,
words, urls, 6000, query.getSegment(), peers, rankingProcess, peer, Switchboard.urlBlacklist,
query.ranking, query.constraint, preselectedPeerHashes);
}

View File

@ -105,6 +105,8 @@ public class SearchEventCache {
final SortedMap<byte[], String> preselectedPeerHashes,
final boolean generateAbstracts,
final LoaderDispatcher loader,
final int remote_maxcount,
final long remote_maxtime,
final int burstRobinsonPercent,
final int burstMultiwordPercent) {
@ -128,7 +130,7 @@ public class SearchEventCache {
}
if (event == null) {
// start a new event
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, burstRobinsonPercent, burstMultiwordPercent);
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, burstRobinsonPercent, burstMultiwordPercent);
}
return event;

View File

@ -275,6 +275,12 @@ public final class SwitchboardConstants {
public static final String DHT_BURST_ROBINSON = "network.unit.dht.burst.robinson";
public static final String DHT_BURST_MULTIWORD = "network.unit.dht.burst.multiword";
public static final String REMOTESEARCH_MAXCOUNT_DEFAULT = "network.unit.remotesearch.maxcount";
public static final String REMOTESEARCH_MAXTIME_DEFAULT = "network.unit.remotesearch.maxtime";
public static final String REMOTESEARCH_MAXCOUNT_USER = "remotesearch.maxcount";
public static final String REMOTESEARCH_MAXTIME_USER = "remotesearch.maxtime";
/**
* <p><code>public static final String <strong>CRAWLER_THREADS_ACTIVE_MAX</strong> = "crawler.MaxActiveThreads"</code></p>
* <p>Name of the setting how many active crawler-threads may maximal be running on the same time</p>

View File

@ -388,6 +388,7 @@ public final class yacyClient {
final String sitehash,
final String authorhash,
final int count,
final long time,
final int maxDistance,
final boolean global,
final int partitions,
@ -420,7 +421,7 @@ public final class yacyClient {
result = new SearchResult(
yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, crypt.randomSalt()),
mySeed, wordhashes, excludehashes, urlhashes, prefer, filter, language,
sitehash, authorhash, count, maxDistance, global, partitions, target.getHexHash() + ".yacyh", target.getClusterAddress(),
sitehash, authorhash, count, time, maxDistance, global, partitions, target.getHexHash() + ".yacyh", target.getClusterAddress(),
secondarySearchSuperviser, rankingProfile, constraint);
} catch (final IOException e) {
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + ")");
@ -590,6 +591,7 @@ public final class yacyClient {
final String sitehash,
final String authorhash,
final int count,
final long time,
final int maxDistance,
final boolean global,
final int partitions,
@ -624,6 +626,7 @@ public final class yacyClient {
}
parts.put("myseed", UTF8.StringBody((mySeed == null) ? "" : mySeed.genSeedStr(key)));
parts.put("count", UTF8.StringBody(Integer.toString(Math.max(10, count))));
parts.put("time", UTF8.StringBody(Long.toString(Math.max(3000, time))));
parts.put("resource", UTF8.StringBody(((global) ? "global" : "local")));
parts.put("partitions", UTF8.StringBody(Integer.toString(partitions)));
parts.put("query", UTF8.StringBody(wordhashes));
@ -1043,6 +1046,7 @@ public final class yacyClient {
"", // sitehash,
"", // authorhash,
10, // count,
3000, // time,
1000, // maxDistance,
true, //global,
16, // partitions,

View File

@ -54,6 +54,7 @@ public class yacySearch extends Thread {
final private yacySeed targetPeer;
private int urls;
private final int count, maxDistance;
private final long time;
final private RankingProfile rankingProfile;
final private Pattern prefer, filter;
final private String language;
@ -66,7 +67,7 @@ public class yacySearch extends Thread {
final Pattern prefer, final Pattern filter,
final String language,
final String sitehash, final String authorhash,
final int count, final int maxDistance,
final int count, final long time, final int maxDistance,
final boolean global, final int partitions,
final yacySeed targetPeer,
final Segment indexSegment,
@ -97,6 +98,7 @@ public class yacySearch extends Thread {
this.targetPeer = targetPeer;
this.urls = -1;
this.count = count;
this.time = time;
this.maxDistance = maxDistance;
this.rankingProfile = rankingProfile;
this.constraint = constraint;
@ -109,12 +111,12 @@ public class yacySearch extends Thread {
peers.mySeed(),
wordhashes, excludehashes, urlhashes, prefer, filter, language,
sitehash, authorhash,
count, maxDistance, global, partitions,
count, time, maxDistance, global, partitions,
targetPeer, indexSegment, containerCache, secondarySearchSuperviser,
blacklist, rankingProfile, constraint);
if (urls >= 0) {
// urls is an array of url hashes. this is only used for log output
//yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + urls.length + " links for word hash " + wordhashes + ": " + new String(urllist));
if (urlhashes != null && urlhashes.length() > 0) yacyCore.log.logInfo("SECONDARY REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + this.urls + " links for word hash " + wordhashes);
peers.mySeed().incRI(urls);
peers.mySeed().incRU(urls);
} else {
@ -151,7 +153,7 @@ public class yacySearch extends Thread {
final Pattern prefer, final Pattern filter, String language,
final String sitehash,
final String authorhash,
final int count, final int maxDist,
final int count, long time, final int maxDist,
final Segment indexSegment,
final yacySeedDB peers,
final RankingProcess containerCache,
@ -187,7 +189,7 @@ public class yacySearch extends Thread {
searchThreads[i] = new yacySearch(
wordhashes, excludehashes, "", prefer, filter, language,
sitehash, authorhash,
count, maxDist, true, targets, targetPeers[i],
count, time, maxDist, true, targets, targetPeers[i],
indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint);
searchThreads[i].start();
} catch (OutOfMemoryError e) {
@ -200,6 +202,7 @@ public class yacySearch extends Thread {
public static yacySearch secondaryRemoteSearch(
final String wordhashes, final String urlhashes,
final long time,
final Segment indexSegment,
final yacySeedDB peers,
final RankingProcess containerCache,
@ -218,7 +221,7 @@ public class yacySearch extends Thread {
if (targetPeer == null || targetPeer.hash == null) return null;
if (clusterselection != null) targetPeer.setAlternativeAddress(clusterselection.get(targetPeer.hash.getBytes()));
final yacySearch searchThread = new yacySearch(
wordhashes, "", urlhashes, Pattern.compile(""), Pattern.compile(".*"), "", "", "", 0, 9999, true, 0, targetPeer,
wordhashes, "", urlhashes, Pattern.compile(""), Pattern.compile(".*"), "", "", "", 0, time, 9999, true, 0, targetPeer,
indexSegment, peers, containerCache, null, blacklist, rankingProfile, constraint);
searchThread.start();
return searchThread;