yacy_search_server/source/de/anomic/data/listManager.java
fuchsi 5b0c1449e1 various fixes and cleanups for blacklist handling:
1. avoid adding duplicate file name entries in config properties for lists, 
2. correctly merge all path masks from all list files for the same host masks,
3. rewrite helper methods standard java methods for Collection transformations,
4. merged various methods with identical functionality for different Collection implementations into one,
5. minor refactoring to improve code readability.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4087 6c8d7289-2bf4-0310-a012-ef5d649a1542
2007-09-10 06:20:27 +00:00

403 lines
14 KiB
Java

// listManager.java
// -------------------------------------
// part of YACY
//
// (C) 2005, 2006 by Alexander Schier
// (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
//
// last change: $LastChangedDate$ by $LastChangedBy$
// $LastChangedRevision$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.data;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
import de.anomic.server.serverCore;
// The Naming of the functions is a bit strange...
public class listManager {
public static plasmaSwitchboard switchboard;
public static File listsPath;
/**
* Get ListSet from configuration file and return it as a unified Set.
*
* <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are
* actually disjunct (pairwise unequal) sets which themselves can be seperated
* into different subsets. E.g., there can be more than one blacklist of a type.
* A ListSet is the set of all those "lists" (subsets) of an equal type.
*
* @param setName name of the ListSet
* @return a ListSet from configuration file
*/
public static Set getListSet(String setName) {
return string2set(switchboard.getConfig(setName, ""));
}
/**
* Removes an element from a ListSet and updates the configuration file
* accordingly. If the element doesn't exist, then nothing will be changed.
*
* @param setName name of the ListSet.
* @param listName name of the element to remove from the ListSet.
*/
public static void removeFromListSet(String setName, String listName) {
Set listSet = getListSet(setName);
if (listSet.size() > 0) {
listSet.remove(listName);
switchboard.setConfig(setName, collection2string(listSet));
}
}
/**
* Adds an element to an existing ListSet. If the ListSet doesn't exist yet,
* a new one will be added. If the ListSet already contains an identical element,
* then nothing happens.
*
* The new list will be written to the configuartion file.
*
* @param setName
* @param newListName
*/
public static void updateListSet(String setName, String newListName) {
Set listSet = getListSet(setName);
listSet.add(newListName);
switchboard.setConfig(setName, collection2string(listSet));
}
/**
* @param setName ListSet in which to search for an element.
* @param listName the element to search for.
* @return <code>true</code> if the ListSet "setName" contains an element
* "listName", <code>false</code> otherwise.
*/
public static boolean listSetContains(String setName, String listName) {
Set Lists = getListSet(setName);
return Lists.contains(listName);
}
//================general Lists==================
/**
* Read lines of a file into an ArrayList.
*
* @param listFile the file
* @return the resulting array as an ArrayList
*/
public static ArrayList getListArray(File listFile){
String line;
ArrayList list = new ArrayList();
int count = 0;
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile),"UTF-8"));
while((line = br.readLine()) != null){
list.add(line);
count++;
}
br.close();
} catch(IOException e) {
// list is empty
} finally {
if (br!=null) try { br.close(); } catch (Exception e) {}
}
return list;
}
/**
* Write a String to a file (used for string representation of lists).
*
* @param listFile the file to write to
* @param out the String to write
* @return returns <code>true</code> if successful, <code>false</code> otherwise
*/
public static boolean writeList(File listFile, String out) {
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new PrintWriter(new FileWriter(listFile)));
bw.write(out);
bw.close();
return true;
} catch(IOException e) {
return false;
} finally {
if (bw!=null) try { bw.close(); } catch (Exception e) {}
}
}
/**
* Write elements of an Array of Strings to a file (one element per line).
*
* @param listFile the file to write to
* @param list the Array to write
* @return returns <code>true</code> if successful, <code>false</code> otherwise
*/
public static boolean writeList(File listFile, String[] list){
StringBuffer out = new StringBuffer();
for(int i=0;i < list.length; i++){
out
.append(list[i])
.append(serverCore.crlfString);
}
return writeList(listFile, new String(out)); //(File, String)
}
// same as below
public static String getListString(String filename, boolean withcomments) {
File listFile = new File(listsPath ,filename);
return getListString(listFile, withcomments);
}
/**
* Read lines of a text file into a String, optionally ignoring comments.
*
* @param listFile the File to read from.
* @param withcomments If <code>false</code> ignore lines starting with '#'.
* @return String representation of the file content.
*/
public static String getListString(File listFile, boolean withcomments){
StringBuffer temp = new StringBuffer();
BufferedReader br = null;
try{
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile)));
temp.ensureCapacity((int) listFile.length());
// Read the List
String line = "";
while ((line = br.readLine()) != null) {
if ((!line.startsWith("#") || withcomments) || !line.equals("")) {
//temp += line + serverCore.crlfString;
temp.append(line)
.append(serverCore.crlfString);
}
}
br.close();
} catch (IOException e) {
} finally {
if (br!=null) try { br.close(); } catch (Exception e) {}
}
return new String(temp);
}
// get a Directory Listing as a String Array
public static String[] getDirListing(String dirname){
final File dir = new File(dirname);
return getDirListing(dir);
}
/**
* Read content of a directory into a String array of file names.
*
* @param dir The directory to get the file listing from. If it doesn't exist yet,
* it will be created.
* @return array of file names
*/
public static String[] getDirListing(File dir){
String[] fileListString;
File[] fileList;
if (dir != null ) {
if (!dir.exists()) {
dir.mkdir();
}
fileList = dir.listFiles();
fileListString = new String[fileList.length];
for (int i=0; i<= fileList.length-1; i++) {
fileListString[i]=fileList[i].getName();
}
return fileListString;
}
return null;
}
// same as below
public static ArrayList getDirsRecursive(File dir, String notdir){
return getDirsRecursive(dir, notdir, true);
}
/**
* Returns a List of all dirs and subdirs as File Objects
*
* Warning: untested
*/
public static ArrayList getDirsRecursive(File dir, String notdir, boolean excludeDotfiles){
final File[] dirList = dir.listFiles();
final ArrayList resultList = new ArrayList();
ArrayList recursive;
Iterator iter;
for (int i=0;i<dirList.length;i++) {
if (dirList[i].isDirectory() && (!excludeDotfiles || !dirList[i].getName().startsWith(".")) && !dirList[i].getName().equals(notdir)) {
resultList.add(dirList[i]);
recursive = getDirsRecursive(dirList[i], notdir, excludeDotfiles);
iter=recursive.iterator();
while (iter.hasNext()) {
resultList.add(iter.next());
}
}
}
return resultList;
}
//================Helper functions for collection conversion==================
/**
* Simple conversion of a Collection of Strings to a comma separated String.
* If the implementing Collection subclass guaranties an order of its elements,
* the substrings of the result will have the same order.
*
* @param col a Collection of Strings.
* @return String with elements from set separated by comma.
*/
public static String collection2string(Collection col){
StringBuffer str = new StringBuffer();
if (col != null && (col.size() > 0)) {
Iterator it = col.iterator();
str.append((String) it.next());
while(it.hasNext()) {
str.append(",").append((String) it.next());
}
}
return str.toString();
}
/**
* @see listManager#string2vector(String)
*/
public static ArrayList string2arraylist(String string){
ArrayList l;
if (string != null) {
l = new ArrayList(Arrays.asList(string.split(",")));
} else {
l = new ArrayList();
}
return l;
}
/**
* Simple conversion of a comma separated list to a unified Set.
*
* @param string list of comma separated Strings
* @return resulting Set or empty Set if string is <code>null</code>
*/
public static Set string2set(String string){
HashSet set;
if (string != null) {
set = new HashSet(Arrays.asList(string.split(",")));
} else {
set = new HashSet();
}
return set;
}
/**
* Simple conversion of a comma separated list to a Vector containing
* the order of the substrings.
*
* @param string list of comma separated Strings
* @return resulting Vector or empty Vector if string is <code>null</code>
*/
public static Vector string2vector(String string){
Vector v;
if (string != null) {
v = new Vector(Arrays.asList(string.split(",")));
} else {
v = new Vector();
}
return v;
}
//=============Blacklist specific================
/**
* Load or reload all active Blacklists
*/
public static void reloadBlacklists(){
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
for (int i=0; i < supportedBlacklistTypes.length; i++) {
blacklistFile blFile = new blacklistFile(
switchboard.getConfig(
supportedBlacklistTypes[i] + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistTypes[i]);
blacklistFiles.add(blFile);
}
plasmaSwitchboard.urlBlacklist.clear();
plasmaSwitchboard.urlBlacklist.loadList(
(blacklistFile[])blacklistFiles.toArray(new blacklistFile[blacklistFiles.size()]),
"/");
// switchboard.urlBlacklist.clear();
// if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/");
}
}