mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Surftips Blacklist
Blacklists List Hardcoded instead of only updated on firststart / migration.java git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3788 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
051a65f7af
commit
d1e1580223
|
@ -63,6 +63,7 @@ import java.util.regex.PatternSyntaxException;
|
||||||
import de.anomic.data.listManager;
|
import de.anomic.data.listManager;
|
||||||
import de.anomic.http.httpHeader;
|
import de.anomic.http.httpHeader;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.abstractURLPattern;
|
||||||
import de.anomic.plasma.urlPattern.defaultURLPattern;
|
import de.anomic.plasma.urlPattern.defaultURLPattern;
|
||||||
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
|
@ -96,7 +97,7 @@ public class BlacklistCleaner_p {
|
||||||
String blacklistToUse = null;
|
String blacklistToUse = null;
|
||||||
|
|
||||||
// getting the list of supported blacklist types
|
// getting the list of supported blacklist types
|
||||||
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
|
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
|
||||||
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
||||||
|
|
||||||
if (post == null) {
|
if (post == null) {
|
||||||
|
|
|
@ -60,6 +60,7 @@ import java.util.TreeMap;
|
||||||
import de.anomic.data.listManager;
|
import de.anomic.data.listManager;
|
||||||
import de.anomic.http.httpHeader;
|
import de.anomic.http.httpHeader;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.abstractURLPattern;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
import de.anomic.server.serverSwitch;
|
import de.anomic.server.serverSwitch;
|
||||||
import de.anomic.yacy.yacyCore;
|
import de.anomic.yacy.yacyCore;
|
||||||
|
@ -77,7 +78,7 @@ public class Blacklist_p {
|
||||||
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
|
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
|
||||||
|
|
||||||
// getting the list of supported blacklist types
|
// getting the list of supported blacklist types
|
||||||
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
|
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
|
||||||
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
||||||
|
|
||||||
String blacklistToUse = null;
|
String blacklistToUse = null;
|
||||||
|
|
|
@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroRotateIterator;
|
||||||
import de.anomic.net.URL;
|
import de.anomic.net.URL;
|
||||||
import de.anomic.plasma.plasmaCondenser;
|
import de.anomic.plasma.plasmaCondenser;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.abstractURLPattern;
|
||||||
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
import de.anomic.server.serverSwitch;
|
import de.anomic.server.serverSwitch;
|
||||||
|
@ -384,7 +385,7 @@ public class IndexControl_p {
|
||||||
if (post.containsKey("blacklistdomains")) {
|
if (post.containsKey("blacklistdomains")) {
|
||||||
PrintWriter pw;
|
PrintWriter pw;
|
||||||
try {
|
try {
|
||||||
String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
|
String[] supportedBlacklistTypes = abstractURLPattern.BLACKLIST_TYPES_STRING.split(",");
|
||||||
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
|
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
|
||||||
URL url;
|
URL url;
|
||||||
for (int i=0; i<urlx.length; i++) {
|
for (int i=0; i<urlx.length; i++) {
|
||||||
|
|
|
@ -26,16 +26,19 @@
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import de.anomic.http.httpHeader;
|
import de.anomic.http.httpHeader;
|
||||||
|
import de.anomic.net.URL;
|
||||||
import de.anomic.plasma.plasmaURL;
|
import de.anomic.plasma.plasmaURL;
|
||||||
import de.anomic.kelondro.kelondroMScoreCluster;
|
import de.anomic.kelondro.kelondroMScoreCluster;
|
||||||
import de.anomic.kelondro.kelondroRow;
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.plasmaURLPattern;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
import de.anomic.server.serverSwitch;
|
import de.anomic.server.serverSwitch;
|
||||||
import de.anomic.tools.crypt;
|
import de.anomic.tools.crypt;
|
||||||
|
@ -129,6 +132,10 @@ public class Surftips {
|
||||||
if (row == null) continue;
|
if (row == null) continue;
|
||||||
|
|
||||||
url = row.getColString(0, null);
|
url = row.getColString(0, null);
|
||||||
|
try{
|
||||||
|
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url)))
|
||||||
|
continue;
|
||||||
|
}catch(MalformedURLException e){continue;};
|
||||||
title = row.getColString(1,"UTF-8");
|
title = row.getColString(1,"UTF-8");
|
||||||
description = row.getColString(2,"UTF-8");
|
description = row.getColString(2,"UTF-8");
|
||||||
if ((url == null) || (title == null) || (description == null)) continue;
|
if ((url == null) || (title == null) || (description == null)) continue;
|
||||||
|
|
|
@ -60,6 +60,7 @@ import de.anomic.http.httpHeader;
|
||||||
import de.anomic.http.httpc;
|
import de.anomic.http.httpc;
|
||||||
import de.anomic.net.URL;
|
import de.anomic.net.URL;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.abstractURLPattern;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
import de.anomic.server.serverSwitch;
|
import de.anomic.server.serverSwitch;
|
||||||
import de.anomic.tools.nxTools;
|
import de.anomic.tools.nxTools;
|
||||||
|
@ -206,7 +207,7 @@ public class sharedBlacklist_p {
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
if (plasmaSwitchboard.urlBlacklist != null) {
|
if (plasmaSwitchboard.urlBlacklist != null) {
|
||||||
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
|
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
|
||||||
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
||||||
|
|
||||||
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
|
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
|
||||||
|
|
|
@ -55,7 +55,10 @@ import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
|
|
||||||
|
import com.sun.tools.javac.comp.Env;
|
||||||
|
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.urlPattern.abstractURLPattern;
|
||||||
import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
|
import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
|
||||||
import de.anomic.server.serverCore;
|
import de.anomic.server.serverCore;
|
||||||
|
|
||||||
|
@ -332,13 +335,14 @@ public class listManager {
|
||||||
|
|
||||||
// load all active Blacklists in the Proxy
|
// load all active Blacklists in the Proxy
|
||||||
public static void reloadBlacklists(){
|
public static void reloadBlacklists(){
|
||||||
String supportedBlacklistTypesStr = switchboard.getConfig("BlackLists.types", "");
|
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
|
||||||
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
||||||
|
|
||||||
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
|
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
|
||||||
for (int i=0; i < supportedBlacklistTypes.length; i++) {
|
for (int i=0; i < supportedBlacklistTypes.length; i++) {
|
||||||
blacklistFile blFile = new blacklistFile(
|
blacklistFile blFile = new blacklistFile(
|
||||||
switchboard.getConfig(supportedBlacklistTypes[i] + ".BlackLists", ""),
|
switchboard.getConfig(
|
||||||
|
supportedBlacklistTypes[i] + ".BlackLists", switchboard.getConfig("lackLists.DefaultList", "url.default.black")),
|
||||||
supportedBlacklistTypes[i]);
|
supportedBlacklistTypes[i]);
|
||||||
blacklistFiles.add(blFile);
|
blacklistFiles.add(blFile);
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,8 +63,10 @@ public abstract class abstractURLPattern implements plasmaURLPattern {
|
||||||
plasmaURLPattern.BLACKLIST_CRAWLER,
|
plasmaURLPattern.BLACKLIST_CRAWLER,
|
||||||
plasmaURLPattern.BLACKLIST_PROXY,
|
plasmaURLPattern.BLACKLIST_PROXY,
|
||||||
plasmaURLPattern.BLACKLIST_DHT,
|
plasmaURLPattern.BLACKLIST_DHT,
|
||||||
plasmaURLPattern.BLACKLIST_SEARCH
|
plasmaURLPattern.BLACKLIST_SEARCH,
|
||||||
|
plasmaURLPattern.BLACKLIST_SURFTIPS
|
||||||
}));
|
}));
|
||||||
|
public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips";
|
||||||
|
|
||||||
protected File blacklistRootPath = null;
|
protected File blacklistRootPath = null;
|
||||||
protected HashMap cachedUrlHashs = null;
|
protected HashMap cachedUrlHashs = null;
|
||||||
|
|
|
@ -10,6 +10,7 @@ public interface plasmaURLPattern {
|
||||||
public static final String BLACKLIST_CRAWLER = "crawler";
|
public static final String BLACKLIST_CRAWLER = "crawler";
|
||||||
public static final String BLACKLIST_PROXY = "proxy";
|
public static final String BLACKLIST_PROXY = "proxy";
|
||||||
public static final String BLACKLIST_SEARCH = "search";
|
public static final String BLACKLIST_SEARCH = "search";
|
||||||
|
public static final String BLACKLIST_SURFTIPS = "surftips";
|
||||||
|
|
||||||
public static final class blacklistFile {
|
public static final class blacklistFile {
|
||||||
|
|
||||||
|
|
|
@ -251,8 +251,10 @@ public class migration {
|
||||||
sb.setConfig("crawler.BlackLists", value);
|
sb.setConfig("crawler.BlackLists", value);
|
||||||
sb.setConfig("dht.BlackLists", value);
|
sb.setConfig("dht.BlackLists", value);
|
||||||
sb.setConfig("search.BlackLists", value);
|
sb.setConfig("search.BlackLists", value);
|
||||||
|
sb.setConfig("surftips.BlackLists", value);
|
||||||
|
|
||||||
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
|
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
|
||||||
|
sb.setConfig("proxyBlackListsActive", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
// migration of http specific crawler settings
|
// migration of http specific crawler settings
|
||||||
|
|
|
@ -248,13 +248,8 @@ proxyYellowList=yacy.yellow
|
||||||
# instead always a 404 is returned
|
# instead always a 404 is returned
|
||||||
# all these files will be placed in the listsPath
|
# all these files will be placed in the listsPath
|
||||||
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
|
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
|
||||||
BlackLists.types=proxy,crawler,dht,search
|
|
||||||
BlackLists.Shared=url.default.black
|
BlackLists.Shared=url.default.black
|
||||||
|
BlackLists.DefaultList=url.default.black
|
||||||
proxy.BlackLists=url.default.black
|
|
||||||
crawler.BlackLists=url.default.black
|
|
||||||
dht.BlackLists=url.default.black
|
|
||||||
search.BlackLists=url.default.black
|
|
||||||
|
|
||||||
proxyCookieBlackList=cookie.default.black
|
proxyCookieBlackList=cookie.default.black
|
||||||
proxyCookieWhiteList=cookie.default.black
|
proxyCookieWhiteList=cookie.default.black
|
||||||
|
|
Loading…
Reference in New Issue
Block a user