yacy_search_server/htroot/sharedBlacklist_p.java
orbiter 1d8d51075c refactoring:
- removed the plasma package. The name of that package came from a very early pre-version of YaCy, even before YaCy was named AnomicHTTPProxy. The Proxy project introduced search for cache contents using class files that had been developed during the plasma project. Information from 2002 about plasma can be found here:
http://web.archive.org/web/20020802110827/http://anomic.de/AnomicPlasma/index.html
We stil have one class that comes mostly unchanged from the plasma project, the Condenser class. But this is now part of the document package and all other classes in the plasma package can be assigned to other packages.
- cleaned up the http package: better structure of that class and clean isolation of server and client classes. The old HTCache becomes part of the client sub-package of http.
- because the plasmaSwitchboard is now part of the search package all servlets had to be touched to declare a different package source.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6232 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-07-19 20:37:44 +00:00

337 lines
16 KiB
Java

//sharedBlacklist_p.java
//-----------------------
//part of the AnomicHTTPProxy
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//This File is contributed by Alexander Schier
//$LastChangedDate$
//$LastChangedRevision$
//$LastChangedBy$
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//You must compile this file with
//javac -classpath .:../Classes Blacklist_p.java
//if the shell's current path is HTROOT
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.AbstractBlacklist;
import de.anomic.data.listManager;
import de.anomic.data.list.ListAccumulator;
import de.anomic.data.list.XMLBlacklistImporter;
import de.anomic.document.parser.html.CharacterCoding;
import de.anomic.http.client.Client;
import de.anomic.http.metadata.HeaderFramework;
import de.anomic.http.metadata.RequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyURL;
import org.xml.sax.SAXException;
public class sharedBlacklist_p {
public static final int STATUS_NONE = 0;
public static final int STATUS_ENTRIES_ADDED = 1;
public static final int STATUS_FILE_ERROR = 2;
public static final int STATUS_PEER_UNKNOWN = 3;
public static final int STATUS_URL_PROBLEM = 4;
public static final int STATUS_WRONG_INVOCATION = 5;
public static final int STATUS_PARSE_ERROR = 6;
private final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
// getting the name of the destination blacklist
String selectedBlacklistName = "";
if( post != null && post.containsKey("currentBlacklist") ){
selectedBlacklistName = post.get("currentBlacklist");
}else{
selectedBlacklistName = "shared.black";
}
prop.putHTML("currentBlacklist", selectedBlacklistName);
prop.putHTML("page_target", selectedBlacklistName);
if (post != null) {
// initialize the list manager
listManager.switchboard = (Switchboard) env;
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// loading all blacklist files located in the directory
final List<String> dirlist = listManager.getDirListing(listManager.listsPath, BLACKLIST_FILENAME_FILTER);
// List BlackLists
int blacklistCount = 0;
if (dirlist != null) {
for (String element : dirlist) {
prop.putXML("page_blackLists_" + blacklistCount + "_name", element);
blacklistCount++;
}
}
prop.put("page_blackLists", blacklistCount);
List<String> otherBlacklist = null;
ListAccumulator otherBlacklists = null;
if (post.containsKey("hash")) {
/* ======================================================
* Import blacklist from other peer
* ====================================================== */
// getting the source peer hash
final String Hash = post.get("hash");
// generate the download URL
String downloadURLOld = null;
if( sb.peers != null ){ //no nullpointer error..
final yacySeed seed = sb.peers.getConnected(Hash);
if (seed != null) {
final String IP = seed.getIP();
final String Port = seed.get(yacySeed.PORT, "8080");
final String peerName = seed.get(yacySeed.NAME, "<" + IP + ":" + Port + ">");
prop.putHTML("page_source", peerName);
downloadURLOld = "http://" + IP + ":" + Port + "/yacy/list.html?col=black";
} else {
prop.put("status", STATUS_PEER_UNKNOWN);//YaCy-Peer not found
prop.putHTML("status_name", Hash);
prop.put("page", "1");
}
} else {
prop.put("status", STATUS_PEER_UNKNOWN);//YaCy-Peer not found
prop.putHTML("status_name", Hash);
prop.put("page", "1");
}
if (downloadURLOld != null) {
// download the blacklist
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA,"no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL,"no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// get List
yacyURL u = new yacyURL(downloadURLOld, null);
otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000), "UTF-8");
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", Hash);
prop.put("page", "1");
}
}
} else if (post.containsKey("url")) {
/* ======================================================
* Download the blacklist from URL
* ====================================================== */
final String downloadURL = post.get("url");
prop.putHTML("page_source", downloadURL);
try {
final yacyURL u = new yacyURL(downloadURL, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000), "UTF-8"); //get List
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);
prop.put("page", "1");
}
} else if (post.containsKey("file")) {
if (post.containsKey("type") && post.get("type").equalsIgnoreCase("xml")) {
/* ======================================================
* Import the blacklist from XML file
* ====================================================== */
final String sourceFileName = post.get("file");
prop.putHTML("page_source", sourceFileName);
final String fileString = post.get("file$file");
if (fileString != null) {
try {
otherBlacklists = new XMLBlacklistImporter().parse(new StringReader(fileString));
} catch (IOException ex) {
prop.put("status", STATUS_FILE_ERROR);
} catch (SAXException ex) {
prop.put("status", STATUS_PARSE_ERROR);
}
}
} else {
/* ======================================================
* Import the blacklist from text file
* ====================================================== */
final String sourceFileName = post.get("file");
prop.putHTML("page_source", sourceFileName);
final String fileString = post.get("file$file");
if (fileString != null) {
try {
otherBlacklist = FileUtils.strings(fileString.getBytes("UTF-8"), "UTF-8");
} catch (IOException ex) {
prop.put("status", STATUS_FILE_ERROR);
}
}
}
} else if (post.containsKey("add")) {
/* ======================================================
* Add loaded items into blacklist file
* ====================================================== */
prop.put("page", "1"); //result page
prop.put("status", STATUS_ENTRIES_ADDED); //list of added Entries
int count = 0;//couter of added entries
PrintWriter pw = null;
try {
// open the blacklist file
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, selectedBlacklistName), true));
// loop through the received entry list
final int num = Integer.parseInt( post.get("num") );
for(int i=0;i < num; i++){
if( post.containsKey("item" + i) ){
String newItem = post.get("item" + i);
//This should not be needed...
if ( newItem.startsWith("http://") ){
newItem = newItem.substring(7);
}
// separate the newItem into host and path
int pos = newItem.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = newItem.length();
newItem = newItem + "/.*";
}
// append the item to the file
pw.println(newItem);
count++;
if (Switchboard.urlBlacklist != null) {
final String supportedBlacklistTypesStr = AbstractBlacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],newItem.substring(0, pos), newItem.substring(pos + 1));
}
}
}
}
}
} catch (final Exception e) {
prop.put("status", "1");
prop.putHTML("status_error", e.getLocalizedMessage());
} finally {
if (pw != null) try { pw.close(); } catch (final Exception e){ /* */}
}
/* unable to use prop.putHTML() or prop.putXML() here because they
* turn the ampersand into &amp; which renders the parameters
* useless (at least when using Opera 9.53, haven't tested other browsers)
*/
prop.put("LOCATION","Blacklist_p.html?selectedListName=" + CharacterCoding.unicode2html(selectedBlacklistName, true) + "&selectList=select");
return prop;
}
// generate the html list
if (otherBlacklist != null) {
// loading the current blacklist content
final HashSet<String> Blacklist = new HashSet<String>(listManager.getListArray(new File(listManager.listsPath, selectedBlacklistName)));
// sort the loaded blacklist
final String[] sortedlist = otherBlacklist.toArray(new String[otherBlacklist.size()]);
Arrays.sort(sortedlist);
int count = 0;
for(int i = 0; i < sortedlist.length; i++){
final String tmp = sortedlist[i];
if( !Blacklist.contains(tmp) && (!tmp.equals("")) ){
//newBlacklist.add(tmp);
prop.put("page_urllist_" + count + "_dark", count % 2 == 0 ? "0" : "1");
prop.putHTML("page_urllist_" + count + "_url", tmp);
prop.put("page_urllist_" + count + "_count", count);
count++;
}
}
prop.put("page_urllist", (count));
prop.put("num", count);
prop.put("page", "0");
} else if (otherBlacklists != null) {
List<List<String>> entries = otherBlacklists.getEntryLists();
//List<Map<String,String>> properties = otherBlacklists.getPropertyMaps();
int count = 0;
for(List<String> list : entries) {
// sort the loaded blacklist
final String[] sortedlist = list.toArray(new String[list.size()]);
Arrays.sort(sortedlist);
for(int i = 0; i < sortedlist.length; i++){
final String tmp = sortedlist[i];
if(!tmp.equals("")){
//newBlacklist.add(tmp);
prop.put("page_urllist_" + count + "_dark", count % 2 == 0 ? "0" : "1");
prop.putHTML("page_urllist_" + count + "_url", tmp);
prop.put("page_urllist_" + count + "_count", count);
count++;
}
}
}
prop.put("page_urllist", (count));
prop.put("num", count);
prop.put("page", "0");
}
} else {
prop.put("page", "1");
prop.put("status", "5");//Wrong Invocation
}
return prop;
}
}