Surftips Blacklist

Blacklists List Hardcoded instead of only updated on firststart / migration.java

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3788 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
allo 2007-06-04 15:36:10 +00:00
parent 051a65f7af
commit d1e1580223
10 changed files with 28 additions and 13 deletions

View File

@ -63,6 +63,7 @@ import java.util.regex.PatternSyntaxException;
import de.anomic.data.listManager; import de.anomic.data.listManager;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.defaultURLPattern; import de.anomic.plasma.urlPattern.defaultURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -96,7 +97,7 @@ public class BlacklistCleaner_p {
String blacklistToUse = null; String blacklistToUse = null;
// getting the list of supported blacklist types // getting the list of supported blacklist types
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", ""); String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
if (post == null) { if (post == null) {

View File

@ -60,6 +60,7 @@ import java.util.TreeMap;
import de.anomic.data.listManager; import de.anomic.data.listManager;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
@ -77,7 +78,7 @@ public class Blacklist_p {
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// getting the list of supported blacklist types // getting the list of supported blacklist types
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", ""); String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
String blacklistToUse = null; String blacklistToUse = null;

View File

@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroRotateIterator;
import de.anomic.net.URL; import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern; import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -384,7 +385,7 @@ public class IndexControl_p {
if (post.containsKey("blacklistdomains")) { if (post.containsKey("blacklistdomains")) {
PrintWriter pw; PrintWriter pw;
try { try {
String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(","); String[] supportedBlacklistTypes = abstractURLPattern.BLACKLIST_TYPES_STRING.split(",");
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true)); pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
URL url; URL url;
for (int i=0; i<urlx.length; i++) { for (int i=0; i<urlx.length; i++) {

View File

@ -26,16 +26,19 @@
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaURL;
import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt; import de.anomic.tools.crypt;
@ -129,6 +132,10 @@ public class Surftips {
if (row == null) continue; if (row == null) continue;
url = row.getColString(0, null); url = row.getColString(0, null);
try{
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url)))
continue;
}catch(MalformedURLException e){continue;};
title = row.getColString(1,"UTF-8"); title = row.getColString(1,"UTF-8");
description = row.getColString(2,"UTF-8"); description = row.getColString(2,"UTF-8");
if ((url == null) || (title == null) || (description == null)) continue; if ((url == null) || (title == null) || (description == null)) continue;

View File

@ -60,6 +60,7 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpc; import de.anomic.http.httpc;
import de.anomic.net.URL; import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools; import de.anomic.tools.nxTools;
@ -206,7 +207,7 @@ public class sharedBlacklist_p {
count++; count++;
if (plasmaSwitchboard.urlBlacklist != null) { if (plasmaSwitchboard.urlBlacklist != null) {
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", ""); String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {

View File

@ -55,7 +55,10 @@ import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Vector; import java.util.Vector;
import com.sun.tools.javac.comp.Env;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile; import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
@ -332,13 +335,14 @@ public class listManager {
// load all active Blacklists in the Proxy // load all active Blacklists in the Proxy
public static void reloadBlacklists(){ public static void reloadBlacklists(){
String supportedBlacklistTypesStr = switchboard.getConfig("BlackLists.types", ""); String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length); ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
for (int i=0; i < supportedBlacklistTypes.length; i++) { for (int i=0; i < supportedBlacklistTypes.length; i++) {
blacklistFile blFile = new blacklistFile( blacklistFile blFile = new blacklistFile(
switchboard.getConfig(supportedBlacklistTypes[i] + ".BlackLists", ""), switchboard.getConfig(
supportedBlacklistTypes[i] + ".BlackLists", switchboard.getConfig("lackLists.DefaultList", "url.default.black")),
supportedBlacklistTypes[i]); supportedBlacklistTypes[i]);
blacklistFiles.add(blFile); blacklistFiles.add(blFile);
} }

View File

@ -63,8 +63,10 @@ public abstract class abstractURLPattern implements plasmaURLPattern {
plasmaURLPattern.BLACKLIST_CRAWLER, plasmaURLPattern.BLACKLIST_CRAWLER,
plasmaURLPattern.BLACKLIST_PROXY, plasmaURLPattern.BLACKLIST_PROXY,
plasmaURLPattern.BLACKLIST_DHT, plasmaURLPattern.BLACKLIST_DHT,
plasmaURLPattern.BLACKLIST_SEARCH plasmaURLPattern.BLACKLIST_SEARCH,
plasmaURLPattern.BLACKLIST_SURFTIPS
})); }));
public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips";
protected File blacklistRootPath = null; protected File blacklistRootPath = null;
protected HashMap cachedUrlHashs = null; protected HashMap cachedUrlHashs = null;

View File

@ -10,6 +10,7 @@ public interface plasmaURLPattern {
public static final String BLACKLIST_CRAWLER = "crawler"; public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy"; public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search"; public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_SURFTIPS = "surftips";
public static final class blacklistFile { public static final class blacklistFile {

View File

@ -251,8 +251,10 @@ public class migration {
sb.setConfig("crawler.BlackLists", value); sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value); sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value); sb.setConfig("search.BlackLists", value);
sb.setConfig("surftips.BlackLists", value);
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared","")); sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
sb.setConfig("proxyBlackListsActive", "");
} }
// migration of http specific crawler settings // migration of http specific crawler settings

View File

@ -248,13 +248,8 @@ proxyYellowList=yacy.yellow
# instead always a 404 is returned # instead always a 404 is returned
# all these files will be placed in the listsPath # all these files will be placed in the listsPath
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
BlackLists.types=proxy,crawler,dht,search
BlackLists.Shared=url.default.black BlackLists.Shared=url.default.black
BlackLists.DefaultList=url.default.black
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
proxyCookieBlackList=cookie.default.black proxyCookieBlackList=cookie.default.black
proxyCookieWhiteList=cookie.default.black proxyCookieWhiteList=cookie.default.black