more seedDB helpers

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4452 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
borg-0300 2008-02-06 12:32:41 +00:00
parent 6a85764e1a
commit 9ab6ad8b73

View File

@ -141,15 +141,15 @@ public final class yacyClient {
if ((otherHash != null) && if ((otherHash != null) &&
(otherHash.length() > 0) && (otherHash.length() > 0) &&
((seed = (String) result.get("seed0")) != null)) { ((seed = (String) result.get("seed0")) != null)) {
if (seed.length() > yacySeed.maxsize) { if (seed.length() > yacySeed.maxsize) {
yacyCore.log.logInfo("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")"); yacyCore.log.logInfo("hello/client 0: rejected contacting seed; too large (" + seed.length() + " > " + yacySeed.maxsize + ")");
} else { } else {
otherPeer = yacySeed.genRemoteSeed(seed, post.get("key", ""), true); otherPeer = yacySeed.genRemoteSeed(seed, post.get("key", ""), true);
if (otherPeer == null || !otherPeer.hash.equals(otherHash)) { if (otherPeer == null || !otherPeer.hash.equals(otherHash)) {
yacyCore.log.logFine("yacyClient.publishMySeed: consistency error: other peer '" + ((otherPeer==null)?"unknown":otherPeer.getName()) + "' wrong"); yacyCore.log.logFine("yacyClient.publishMySeed: consistency error: other peer '" + ((otherPeer==null)?"unknown":otherPeer.getName()) + "' wrong");
return -1; // no success return -1; // no success
} }
otherPeerVersion = otherPeer.getVersion(); otherPeerVersion = otherPeer.getVersion();
} }
} }
@ -229,10 +229,10 @@ public final class yacyClient {
while ((seedStr = (String) result.get("seed" + i++)) != null) { while ((seedStr = (String) result.get("seed" + i++)) != null) {
// integrate new seed into own database // integrate new seed into own database
// the first seed, "seed0" is the seed of the responding peer // the first seed, "seed0" is the seed of the responding peer
if (seedStr.length() > yacySeed.maxsize) { if (seedStr.length() > yacySeed.maxsize) {
yacyCore.log.logInfo("hello/client: rejected contacting seed; too large (" + seedStr.length() + " > " + yacySeed.maxsize + ")"); yacyCore.log.logInfo("hello/client: rejected contacting seed; too large (" + seedStr.length() + " > " + yacySeed.maxsize + ")");
} else { } else {
if (yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, post.get("key", ""), true), (i == 1))) count++; if (yacyCore.peerActions.peerArrival(yacySeed.genRemoteSeed(seedStr, post.get("key", ""), true), (i == 1))) count++;
} }
} }
return count; return count;
@ -243,22 +243,23 @@ public final class yacyClient {
final serverObjects post = yacyNetwork.basicRequestPost(plasmaSwitchboard.getSwitchboard(), target.hash); final serverObjects post = yacyNetwork.basicRequestPost(plasmaSwitchboard.getSwitchboard(), target.hash);
post.put("object", "seed"); post.put("object", "seed");
post.put("env", seedHash); post.put("env", seedHash);
// send request // send request
try { try {
final HashMap<String, String> result = nxTools.table( final HashMap<String, String> result = nxTools.table(
httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null),
target.getHexHash() + ".yacyh", target.getHexHash() + ".yacyh",
8000, 10000,
null, null,
null, null,
proxyConfig(), proxyConfig(),
post, post,
null null
), "UTF-8" ), "UTF-8"
); );
if (result == null || result.size() == 0) { return null; } if (result == null || result.size() == 0) { return null; }
target.setLastSeenUTC();
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time //final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
return yacySeed.genRemoteSeed((String) result.get("response"), post.get("key", ""), true); return yacySeed.genRemoteSeed((String) result.get("response"), post.get("key", ""), true);
} catch (Exception e) { } catch (Exception e) {
@ -278,8 +279,8 @@ public final class yacyClient {
try { try {
final HashMap<String, String> result = nxTools.table( final HashMap<String, String> result = nxTools.table(
httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null),
target.getHexHash() + ".yacyh", target.getHexHash() + ".yacyh",
8000, 10000,
null, null,
null, null,
proxyConfig(), proxyConfig(),
@ -289,7 +290,15 @@ public final class yacyClient {
); );
if (result == null || result.size() == 0) { return -1; } if (result == null || result.size() == 0) { return -1; }
return Integer.parseInt((String) result.get("response")); final String resp = (String) result.get("response");
if (resp == null) {
return -1;
} else try {
target.setLastSeenUTC();
return Integer.parseInt(resp);
} catch (NumberFormatException e) {
return -1;
}
} catch (Exception e) { } catch (Exception e) {
yacyCore.log.logSevere("yacyClient.queryRWICount error:" + e.getMessage()); yacyCore.log.logSevere("yacyClient.queryRWICount error:" + e.getMessage());
return -1; return -1;
@ -299,32 +308,33 @@ public final class yacyClient {
public static int queryUrlCount(yacySeed target) { public static int queryUrlCount(yacySeed target) {
if (target == null) { return -1; } if (target == null) { return -1; }
if (yacyCore.seedDB.mySeed() == null) return -1; if (yacyCore.seedDB.mySeed() == null) return -1;
// prepare request // prepare request
final serverObjects post = yacyNetwork.basicRequestPost(plasmaSwitchboard.getSwitchboard(), target.hash); final serverObjects post = yacyNetwork.basicRequestPost(plasmaSwitchboard.getSwitchboard(), target.hash);
post.put("object", "lurlcount"); post.put("object", "lurlcount");
post.put("ttl", "0"); post.put("ttl", "0");
post.put("env", ""); post.put("env", "");
// send request // send request
try { try {
final HashMap<String, String> result = nxTools.table( final HashMap<String, String> result = nxTools.table(
httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null), httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/query.html", null),
target.getHexHash() + ".yacyh", target.getHexHash() + ".yacyh",
6000, 10000,
null, null,
null, null,
proxyConfig(), proxyConfig(),
post, post,
null null
), "UTF-8" ), "UTF-8"
); );
if ((result == null) || (result.size() == 0)) return -1; if ((result == null) || (result.size() == 0)) return -1;
final String resp = (String) result.get("response"); final String resp = (String) result.get("response");
if (resp == null) { if (resp == null) {
return -1; return -1;
} else try { } else try {
target.setLastSeenUTC();
return Integer.parseInt(resp); return Integer.parseInt(resp);
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
return -1; return -1;
@ -431,7 +441,7 @@ public final class yacyClient {
// send request // send request
HashMap<String, String> result = null; HashMap<String, String> result = null;
try { try {
result = nxTools.table( result = nxTools.table(
httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/search.html", null), httpc.wput(new yacyURL("http://" + target.getClusterAddress() + "/yacy/search.html", null),
target.getHexHash() + ".yacyh", target.getHexHash() + ".yacyh",
60000, 60000,
@ -441,7 +451,7 @@ public final class yacyClient {
post, post,
null null
), "UTF-8" ), "UTF-8"
); );
} catch (IOException e) { } catch (IOException e) {
yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(target.hash, wordhashes.substring(0, 12))); yacyCore.log.logFine("SEARCH failed FROM " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(target.hash, wordhashes.substring(0, 12)));
yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage()); yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
@ -449,37 +459,37 @@ public final class yacyClient {
} }
if ((result == null) || (result.size() == 0)) { if ((result == null) || (result.size() == 0)) {
yacyCore.log.logFine("SEARCH failed FROM " yacyCore.log.logFine("SEARCH failed FROM "
+ target.hash + target.hash
+ ":" + ":"
+ target.getName() + target.getName()
+ " (zero response), score=" + " (zero response), score="
+ target.selectscore + target.selectscore
+ ", DHTdist=" + ", DHTdist="
+ yacyDHTAction.dhtDistance(target.hash, wordhashes + yacyDHTAction.dhtDistance(target.hash, wordhashes
.substring(0, 12))); .substring(0, 12)));
return null; return null;
} }
// compute all computation times // compute all computation times
final long totalrequesttime = System.currentTimeMillis() - timestamp; final long totalrequesttime = System.currentTimeMillis() - timestamp;
// OUTPUT: // OUTPUT:
// version : application version of responder // version : application version of responder
// uptime : uptime in seconds of responder // uptime : uptime in seconds of responder
// total : number of total available LURL's for this search // total : number of total available LURL's for this search
// count : number of returned LURL's for this search // count : number of returned LURL's for this search
// resource<n> : LURL of search // resource<n> : LURL of search
// fwhop : hops (depth) of forwards that had been performed to construct this result // fwhop : hops (depth) of forwards that had been performed to construct this result
// fwsrc : peers that helped to construct this result // fwsrc : peers that helped to construct this result
// fwrec : peers that would have helped to construct this result (recommendations) // fwrec : peers that would have helped to construct this result (recommendations)
// searchtime : time that the peer actually spent to create the result // searchtime : time that the peer actually spent to create the result
// references : references (search hints) that was calculated during search // references : references (search hints) that was calculated during search
// now create a plasmaIndex out of this result // now create a plasmaIndex out of this result
// System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug // System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug
int results = 0, joincount = 0; int results = 0, joincount = 0;
try { try {
results = Integer.parseInt(result.get("count")); results = Integer.parseInt(result.get("count"));
joincount = Integer.parseInt(result.get("joincount")); joincount = Integer.parseInt(result.get("joincount"));
@ -488,74 +498,74 @@ public final class yacyClient {
yacyCore.peerActions.peerDeparture(target, "search request to peer created number format exception"); yacyCore.peerActions.peerDeparture(target, "search request to peer created number format exception");
return null; return null;
} }
// System.out.println("***result count " + results); // System.out.println("***result count " + results);
// create containers // create containers
final int words = wordhashes.length() / yacySeedDB.commonHashLength; final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words]; indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) { for (int i = 0; i < words; i++) {
container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), count); container[i] = plasmaWordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), count);
} }
// insert results to containers // insert results to containers
indexURLEntry urlEntry; indexURLEntry urlEntry;
String[] urls = new String[results]; String[] urls = new String[results];
for (int n = 0; n < results; n++) { for (int n = 0; n < results; n++) {
// get one single search result // get one single search result
urlEntry = wordIndex.loadedURL.newEntry((String) result.get("resource" + n)); urlEntry = wordIndex.loadedURL.newEntry((String) result.get("resource" + n));
if (urlEntry == null) continue; if (urlEntry == null) continue;
assert (urlEntry.hash().length() == 12) : "urlEntry.hash() = " + urlEntry.hash(); assert (urlEntry.hash().length() == 12) : "urlEntry.hash() = " + urlEntry.hash();
if (urlEntry.hash().length() != 12) continue; // bad url hash if (urlEntry.hash().length() != 12) continue; // bad url hash
indexURLEntry.Components comp = urlEntry.comp(); indexURLEntry.Components comp = urlEntry.comp();
if (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, comp.url())) { if (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, comp.url())) {
yacyCore.log.logInfo("remote search (client): filtered blacklisted url " + comp.url() + " from peer " + target.getName()); yacyCore.log.logInfo("remote search (client): filtered blacklisted url " + comp.url() + " from peer " + target.getName());
continue; // block with backlist continue; // block with backlist
} }
if (!plasmaSwitchboard.getSwitchboard().acceptURL(comp.url())) { if (!plasmaSwitchboard.getSwitchboard().acceptURL(comp.url())) {
yacyCore.log.logInfo("remote search (client): rejected url outside of our domain " + comp.url() + " from peer " + target.getName()); yacyCore.log.logInfo("remote search (client): rejected url outside of our domain " + comp.url() + " from peer " + target.getName());
continue; // reject url outside of our domain continue; // reject url outside of our domain
} }
// save the url entry // save the url entry
indexRWIEntry entry; indexRWIEntry entry;
if (urlEntry.word() == null) { if (urlEntry.word() == null) {
yacyCore.log.logWarning("remote search (client): no word attached from peer " + target.getName() + ", version " + target.getVersion()); yacyCore.log.logWarning("remote search (client): no word attached from peer " + target.getName() + ", version " + target.getVersion());
continue; // no word attached continue; // no word attached
} }
// the search-result-url transports all the attributes of word indexes // the search-result-url transports all the attributes of word indexes
entry = urlEntry.word(); entry = urlEntry.word();
if (!(entry.urlHash().equals(urlEntry.hash()))) { if (!(entry.urlHash().equals(urlEntry.hash()))) {
yacyCore.log.logInfo("remote search (client): url-hash " + urlEntry.hash() + " does not belong to word-attached-hash " + entry.urlHash() + "; url = " + comp.url() + " from peer " + target.getName()); yacyCore.log.logInfo("remote search (client): url-hash " + urlEntry.hash() + " does not belong to word-attached-hash " + entry.urlHash() + "; url = " + comp.url() + " from peer " + target.getName());
continue; // spammed continue; // spammed
} }
// passed all checks, store url // passed all checks, store url
try { try {
wordIndex.loadedURL.store(urlEntry); wordIndex.loadedURL.store(urlEntry);
wordIndex.loadedURL.stack(urlEntry, yacyCore.seedDB.mySeed().hash, target.hash, 2); wordIndex.loadedURL.stack(urlEntry, yacyCore.seedDB.mySeed().hash, target.hash, 2);
} catch (IOException e) { } catch (IOException e) {
yacyCore.log.logSevere("could not store search result", e); yacyCore.log.logSevere("could not store search result", e);
continue; // db-error continue; // db-error
} }
if (urlEntry.snippet() != null) { if (urlEntry.snippet() != null) {
// we don't store the snippets along the url entry, // we don't store the snippets along the url entry,
// because they are search-specific. // because they are search-specific.
// instead, they are placed in a snipped-search cache. // instead, they are placed in a snipped-search cache.
// System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'"); // System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'");
plasmaSnippetCache.storeToCache(wordhashes, urlEntry.hash(), urlEntry.snippet()); plasmaSnippetCache.storeToCache(wordhashes, urlEntry.hash(), urlEntry.snippet());
} }
// add the url entry to the word indexes // add the url entry to the word indexes
for (int m = 0; m < words; m++) { for (int m = 0; m < words; m++) {
container[m].add(entry, System.currentTimeMillis()); container[m].add(entry, System.currentTimeMillis());
} }
// store url hash for statistics // store url hash for statistics
urls[n] = urlEntry.hash(); urls[n] = urlEntry.hash();
} }
// store remote result to local result container // store remote result to local result container
synchronized (containerCache) { synchronized (containerCache) {
@ -572,58 +582,58 @@ public final class yacyClient {
} }
} }
// read index abstract // read index abstract
if (abstractCache != null) { if (abstractCache != null) {
Iterator<Map.Entry<String, String>> i = result.entrySet().iterator(); Iterator<Map.Entry<String, String>> i = result.entrySet().iterator();
Map.Entry<String, String> entry; Map.Entry<String, String> entry;
TreeMap<String, String> singleAbstract; TreeMap<String, String> singleAbstract;
String wordhash; String wordhash;
serverByteBuffer ci; serverByteBuffer ci;
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
if (entry.getKey().startsWith("indexabstract.")) { if (entry.getKey().startsWith("indexabstract.")) {
wordhash = entry.getKey().substring(14); wordhash = entry.getKey().substring(14);
synchronized (abstractCache) { synchronized (abstractCache) {
singleAbstract = (TreeMap<String, String>) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes singleAbstract = (TreeMap<String, String>) abstractCache.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
if (singleAbstract == null) singleAbstract = new TreeMap<String, String>(); if (singleAbstract == null) singleAbstract = new TreeMap<String, String>();
ci = new serverByteBuffer(entry.getValue().getBytes()); ci = new serverByteBuffer(entry.getValue().getBytes());
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString()); //System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
indexContainer.decompressIndex(singleAbstract, ci, target.hash); indexContainer.decompressIndex(singleAbstract, ci, target.hash);
abstractCache.put(wordhash, singleAbstract); abstractCache.put(wordhash, singleAbstract);
} }
} }
} }
} }
// insert the containers to the index // insert the containers to the index
for (int m = 0; m < words; m++) { for (int m = 0; m < words; m++) {
wordIndex.addEntries(container[m], true); wordIndex.addEntries(container[m], true);
} }
// generate statistics // generate statistics
long searchtime; long searchtime;
try { try {
searchtime = Integer.parseInt((String) result.get("searchtime")); searchtime = Integer.parseInt((String) result.get("searchtime"));
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
searchtime = totalrequesttime; searchtime = totalrequesttime;
} }
yacyCore.log.logFine("SEARCH " yacyCore.log.logFine("SEARCH "
+ results + results
+ " URLS FROM " + " URLS FROM "
+ target.hash + target.hash
+ ":" + ":"
+ target.getName() + target.getName()
+ ", score=" + ", score="
+ target.selectscore + target.selectscore
+ ", DHTdist=" + ", DHTdist="
+ ((wordhashes.length() < 12) ? "void" : Double + ((wordhashes.length() < 12) ? "void" : Double
.toString(yacyDHTAction.dhtDistance(target.hash, .toString(yacyDHTAction.dhtDistance(target.hash,
wordhashes.substring(0, 12)))) wordhashes.substring(0, 12))))
+ ", searchtime=" + searchtime + ", netdelay=" + ", searchtime=" + searchtime + ", netdelay="
+ (totalrequesttime - searchtime) + ", references=" + (totalrequesttime - searchtime) + ", references="
+ result.get("references")); + result.get("references"));
return urls; return urls;
} }
public static HashMap<String, String> permissionMessage(String targetHash) { public static HashMap<String, String> permissionMessage(String targetHash) {
// ask for allowed message size and attachement size // ask for allowed message size and attachement size
@ -874,25 +884,28 @@ public final class yacyClient {
// transfer the RWI without the URLs // transfer the RWI without the URLs
HashMap<String, String> in = transferRWI(targetSeed, indexes, gzipBody, timeout); HashMap<String, String> in = transferRWI(targetSeed, indexes, gzipBody, timeout);
resultObj.put("resultTransferRWI", in); resultObj.put("resultTransferRWI", in);
if (in == null) { if (in == null) {
resultObj.put("result", "no_connection_1"); resultObj.put("result", "no_connection_1");
return resultObj; return resultObj;
} }
if (in.containsKey("indexPayloadSize")) payloadSize += Integer.parseInt(in.get("indexPayloadSize")); if (in.containsKey("indexPayloadSize")) payloadSize += Integer.parseInt(in.get("indexPayloadSize"));
String result = (String) in.get("result"); String result = (String) in.get("result");
if (result == null) { if (result == null) {
resultObj.put("result", "no_result_1"); resultObj.put("result", "no_result_1");
return resultObj; return resultObj;
} }
targetSeed.setLastSeenUTC();
if (!(result.equals("ok"))) { if (!(result.equals("ok"))) {
targetSeed.setFlagAcceptRemoteIndex(false); targetSeed.setFlagAcceptRemoteIndex(false);
yacyCore.seedDB.update(targetSeed.hash, targetSeed); yacyCore.seedDB.update(targetSeed.hash, targetSeed);
resultObj.put("result", result); resultObj.put("result", result);
return resultObj; return resultObj;
} }
// in now contains a list of unknown hashes // in now contains a list of unknown hashes
final String uhss = (String) in.get("unknownURL"); final String uhss = (String) in.get("unknownURL");
if (uhss == null) { if (uhss == null) {
@ -900,10 +913,10 @@ public final class yacyClient {
return resultObj; return resultObj;
} }
if (uhss.length() == 0) { return resultObj; } // all url's known, we are ready here if (uhss.length() == 0) { return resultObj; } // all url's known, we are ready here
final String[] uhs = uhss.split(","); final String[] uhs = uhss.split(",");
if (uhs.length == 0) { return resultObj; } // all url's known if (uhs.length == 0) { return resultObj; } // all url's known
// extract the urlCache from the result // extract the urlCache from the result
indexURLEntry[] urls = new indexURLEntry[uhs.length]; indexURLEntry[] urls = new indexURLEntry[uhs.length];
for (int i = 0; i < uhs.length; i++) { for (int i = 0; i < uhs.length; i++) {
@ -1073,8 +1086,8 @@ public final class yacyClient {
httpc.wput( httpc.wput(
new yacyURL("http://" + address + "/yacy/profile.html", null), new yacyURL("http://" + address + "/yacy/profile.html", null),
targetSeed.getHexHash() + ".yacyh", targetSeed.getHexHash() + ".yacyh",
10000, 12000,
null, null,
null, null,
proxyConfig(), proxyConfig(),
post, post,