mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) added cache for blacklists urlhashs recieved by DHT. DHT does not request URLs listed in this cache.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2251 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
9f55973fe2
commit
8ba8e2b7d9
|
@ -78,6 +78,7 @@ public final class transferRWI {
|
|||
final int entryc = Integer.parseInt(post.get("entryc", "")); // number of entries in indexes
|
||||
byte[] indexes = post.get("indexes", "").getBytes(); // the indexes, as list of word entries
|
||||
boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true");
|
||||
boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
|
||||
boolean checkLimit = sb.getConfigBool("indexDistribution.dhtReceiptLimitEnabled", true);
|
||||
final long cachelimit = sb.getConfigLong("indexDistribution.dhtReceiptLimit", 1000);
|
||||
final yacySeed otherPeer = yacyCore.seedDB.get(iam);
|
||||
|
@ -152,8 +153,15 @@ public final class transferRWI {
|
|||
try {
|
||||
if ((!(unknownURL.contains(urlHash))) &&
|
||||
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
|
||||
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
|
||||
int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
|
||||
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
|
||||
//TODO: set to logFine if it works.
|
||||
}
|
||||
else {
|
||||
unknownURL.add(urlHash);
|
||||
}
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
sb.getLog().logWarning(
|
||||
"transferRWI: DB-Error while trying to determine if URL with hash '" +
|
||||
|
|
|
@ -96,7 +96,7 @@ public final class transferURL {
|
|||
lEntry = sb.urlPool.loadedURL.newEntry(urls, true);
|
||||
if ((lEntry != null) && (lEntry.url() != null)) {
|
||||
if ((blockBlacklist) &&
|
||||
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.url()))) {
|
||||
(plasmaSwitchboard.urlBlacklist.isListed(lEntry.hash(), lEntry.url()))) {
|
||||
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
|
||||
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
|
||||
lEntry = null;
|
||||
|
|
|
@ -43,11 +43,16 @@ package de.anomic.plasma;
|
|||
|
||||
import java.io.File;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import de.anomic.kelondro.kelondroMSetTools;
|
||||
|
||||
public class plasmaURLPattern {
|
||||
|
||||
private Set cachedUrlHashs = Collections.synchronizedSet(new HashSet());
|
||||
private File rootPath = null;
|
||||
private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
|
||||
|
||||
|
@ -85,6 +90,22 @@ public class plasmaURLPattern {
|
|||
hostpaths.put(host.toLowerCase(), path);
|
||||
}
|
||||
|
||||
public boolean hashInBlacklistedCache(String urlHash) {
|
||||
return cachedUrlHashs.contains(urlHash);
|
||||
}
|
||||
|
||||
public boolean isListed(String urlHash, URL url) {
|
||||
if (!cachedUrlHashs.contains(urlHash)) {
|
||||
boolean temp = isListed(url.getHost().toLowerCase(), url.getFile());
|
||||
if (temp)
|
||||
{
|
||||
cachedUrlHashs.add(urlHash);
|
||||
}
|
||||
return temp;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isListed(URL url) {
|
||||
return isListed(url.getHost().toLowerCase(), url.getFile());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user