*) added cache for blacklists urlhashs recieved by DHT. DHT does not request URLs listed in this cache.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2251 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
hydrox 2006-06-28 08:51:34 +00:00
parent 9f55973fe2
commit 8ba8e2b7d9
3 changed files with 31 additions and 2 deletions

View File

@ -78,6 +78,7 @@ public final class transferRWI {
final int entryc = Integer.parseInt(post.get("entryc", "")); // number of entries in indexes
byte[] indexes = post.get("indexes", "").getBytes(); // the indexes, as list of word entries
boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
boolean checkLimit = sb.getConfigBool("indexDistribution.dhtReceiptLimitEnabled", true);
final long cachelimit = sb.getConfigLong("indexDistribution.dhtReceiptLimit", 1000);
final yacySeed otherPeer = yacyCore.seedDB.get(iam);
@ -152,8 +153,15 @@ public final class transferRWI {
try {
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
//TODO: set to logFine if it works.
}
else {
unknownURL.add(urlHash);
}
}
} catch (Exception ex) {
sb.getLog().logWarning(
"transferRWI: DB-Error while trying to determine if URL with hash '" +

View File

@ -96,7 +96,7 @@ public final class transferURL {
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
if ((lEntry != null) && (lEntry.url() != null)) {
if ((blockBlacklist) &&
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.url()))) {
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.hash(), lEntry.url()))) {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;

View File

@ -43,11 +43,16 @@ package de.anomic.plasma;
import java.io.File;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import de.anomic.kelondro.kelondroMSetTools;
public class plasmaURLPattern {
private Set cachedUrlHashs = Collections.synchronizedSet(new HashSet());
private File rootPath = null;
private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
@ -85,6 +90,22 @@ public class plasmaURLPattern {
hostpaths.put(host.toLowerCase(), path);
}
public boolean hashInBlacklistedCache(String urlHash) {
return cachedUrlHashs.contains(urlHash);
}
public boolean isListed(String urlHash, URL url) {
if (!cachedUrlHashs.contains(urlHash)) {
boolean temp = isListed(url.getHost().toLowerCase(), url.getFile());
if (temp)
{
cachedUrlHashs.add(urlHash);
}
return temp;
}
return true;
}
public boolean isListed(URL url) {
return isListed(url.getHost().toLowerCase(), url.getFile());
}