- fixed a problem loading blacklists with new yacycore.jar

- fixed badwords and stopwords initialization

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6315 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-09-15 11:46:02 +00:00
parent 80d5005044
commit 721b88efbd
4 changed files with 8 additions and 43 deletions

View File

@ -298,7 +298,6 @@ proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.class=de.anomic.kelondro.text.DefaultBlacklist
BlackLists.Shared=url.default.black
BlackLists.DefaultList=url.default.black

View File

@ -33,9 +33,6 @@ import java.util.Map.Entry;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class DefaultBlacklist extends AbstractBlacklist implements Blacklist {
public DefaultBlacklist(final File rootPath) {

View File

@ -91,7 +91,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Constructor;
import java.net.MalformedURLException;
import java.security.NoSuchAlgorithmException;
import java.security.PublicKey;
@ -132,6 +131,7 @@ import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.retrieval.LoaderDispatcher;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.Blacklist;
import de.anomic.data.DefaultBlacklist;
import de.anomic.data.LibraryProvider;
import de.anomic.data.URLLicense;
import de.anomic.data.blogBoard;
@ -208,8 +208,8 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
public static long lastPPMUpdate = System.currentTimeMillis()- 30000;
// colored list management
public static TreeSet<String> badwords = new TreeSet<String>();
public static TreeSet<String> stopwords = new TreeSet<String>();
public static TreeSet<String> badwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static TreeSet<String> stopwords = new TreeSet<String>(NaturalOrder.naturalComparator);
public static TreeSet<String> blueList = null;
public static TreeSet<byte[]> badwordHashes = null;
public static TreeSet<byte[]> blueListHashes = null;
@ -419,35 +419,10 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
ppRamString(plasmaBlueListFile.length()/1024));
}
// load the black-list / inspired by [AS]
// load blacklist
this.log.logConfig("Loading blacklist ...");
final File blacklistsPath = getConfigPath(SwitchboardConstants.LISTS_PATH, SwitchboardConstants.LISTS_PATH_DEFAULT);
String[] blacklistClassName = new String[] {
getConfig(SwitchboardConstants.BLACKLIST_CLASS, SwitchboardConstants.BLACKLIST_CLASS_DEFAULT),
SwitchboardConstants.BLACKLIST_CLASS_DEFAULT
};
this.log.logConfig("Starting blacklist engine ...");
urlBlacklist = null;
for (int i = 0; i < blacklistClassName.length; i++) {
try {
final Class<?> blacklistClass = Class.forName(blacklistClassName[i]);
final Constructor<?> blacklistClassConstr = blacklistClass.getConstructor( new Class[] { File.class } );
urlBlacklist = (Blacklist) blacklistClassConstr.newInstance(new Object[] { blacklistsPath });
this.log.logFine("Used blacklist engine class: " + blacklistClassName);
this.log.logConfig("Using blacklist engine: " + urlBlacklist.getEngineInfo());
break;
} catch (final Exception e) {
continue; // try next
} catch (final Error e) {
continue; // try next
}
}
if (urlBlacklist == null) {
this.log.logSevere("Unable to load the blacklist engine");
System.exit(-1);
}
this.log.logConfig("Loading backlist data ...");
urlBlacklist = new DefaultBlacklist(blacklistsPath);
listManager.switchboard = this;
listManager.listsPath = blacklistsPath;
listManager.reloadBlacklists();

View File

@ -286,6 +286,7 @@ public final class SwitchboardConstants {
*/
public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads";
public static final String YACY_MODE_DEBUG = "yacyDebugMode";
/**
* <p><code>public static final String <strong>WORDCACHE_MAX_COUNT</strong> = "wordCacheMaxCount"</code></p>
* <p>Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the
@ -295,20 +296,13 @@ public final class SwitchboardConstants {
public static final String HTTPC_NAME_CACHE_CACHING_PATTERNS_NO = "httpc.nameCacheNoCachingPatterns";
public static final String ROBOTS_TXT = "httpd.robots.txt";
public static final String ROBOTS_TXT_DEFAULT = RobotsTxtConfig.LOCKED + "," + RobotsTxtConfig.DIRS;
/**
* <p><code>public static final String <strong>BLACKLIST_CLASS</strong> = "Blacklist.class"</code></p>
* <p>Name of the setting which Blacklist backend shall be used. Due to different requirements of users, the
* {@link plasmaURLPattern}-interface has been created to support blacklist engines different from YaCy's default</p>
* <p>Attention is required when the backend is changed, because different engines may have different syntaxes</p>
*/
public static final String BLACKLIST_CLASS = "BlackLists.class";
/**
* <p><code>public static final String <strong>BLACKLIST_CLASS_DEFAULT</strong> = "de.anomic.plasma.urlPattern.defaultURLPattern"</code></p>
* <p>Package and name of YaCy's {@link DefaultBlacklist default} blacklist implementation</p>
*
* @see DefaultBlacklist for a detailed overview about the syntax of the default implementation
*/
public static final String BLACKLIST_CLASS_DEFAULT = "de.anomic.data.DefaultBlacklist";
public static final String LIST_BLUE = "plasmaBlueList";
public static final String LIST_BLUE_DEFAULT = null;
public static final String LIST_BADWORDS_DEFAULT = "yacy.badwords";