adapted for isListed()

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1942 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
borg-0300 2006-03-21 20:55:59 +00:00
parent 8b14a0c833
commit 77f3237de3

View File

@ -1,12 +1,13 @@
// httpdProxyHandler.java // httpdProxyHandler.java
// ----------------------- // -----------------------
// part of YACY // part of YACY
// (C) by Michael Peter Christen; mc@anomic.de // (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de // first published on http://www.anomic.de
// Frankfurt, Germany, 2004 // Frankfurt, Germany, 2004
// //
// last major change: $LastChangedDate$ by $LastChangedBy$ // $LastChangedDate$
// Revision: $LastChangedRevision$ // $LastChangedRevision$
// $LastChangedBy$
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
@ -323,29 +324,30 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream) * @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream)
*/ */
public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException { public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException {
this.connectionProperties = conProp; this.connectionProperties = conProp;
try { try {
// remembering the starting time of the request // remembering the starting time of the request
final Date requestDate = new Date(); // remember the time... final Date requestDate = new Date(); // remember the time...
this.connectionProperties.put(httpHeader.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime())); this.connectionProperties.put(httpHeader.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction(); if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
switchboard.proxyLastAccess = System.currentTimeMillis(); switchboard.proxyLastAccess = System.currentTimeMillis();
// using an ByteCount OutputStream to count the send bytes (needed for the logfile) // using an ByteCount OutputStream to count the send bytes (needed for the logfile)
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpHeader.CONNECTION_PROP_REQUESTLINE).length() + 2); respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpHeader.CONNECTION_PROP_REQUESTLINE).length() + 2);
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
final String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/' String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
int pos=0; int pos=0;
int port=0; int port=0;
URL url = null; URL url = null;
try { try {
url = httpHeader.getRequestURL(conProp); url = httpHeader.getRequestURL(conProp);
//redirector //redirector
if (redirectorEnabled){ if (redirectorEnabled){
synchronized(redirectorProcess){ synchronized(redirectorProcess){
@ -377,7 +379,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
port = Integer.parseInt(host.substring(pos + 1)); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
String ext; String ext;
if ((pos = path.lastIndexOf('.')) < 0) { if ((pos = path.lastIndexOf('.')) < 0) {
ext = ""; ext = "";
@ -389,16 +391,17 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// blacklist idea inspired by [AS]: // blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase(); String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) { if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
httpd.sendRespondError(conProp,respond,4,403,null, httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
return; return;
} }
// handle outgoing cookies // handle outgoing cookies
handleOutgoingCookies(requestHeader, host, ip); handleOutgoingCookies(requestHeader, host, ip);
// set another userAgent, if not yellowlisted // set another userAgent, if not yellowlisted
if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) { if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) {
// change the User-Agent // change the User-Agent
@ -441,7 +444,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
requestDate, // init date requestDate, // init date
0, // crawling depth 0, // crawling depth
url, // url url, // url
"", // name of the url is unknown "", // name of the url is unknown
requestHeader, // request headers requestHeader, // request headers
"200 OK", // request status "200 OK", // request status
cachedResponseHeader, // response headers cachedResponseHeader, // response headers
@ -646,9 +649,9 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// ok, we don't write actually into a file, only to RAM, and schedule writing the file. // ok, we don't write actually into a file, only to RAM, and schedule writing the file.
byte[] cacheArray = res.writeContent(hfos); byte[] cacheArray = res.writeContent(hfos);
this.theLogger.logFine("writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length))); this.theLogger.logFine("writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
// totally fresh file // totally fresh file
//cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert //cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
@ -667,7 +670,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
cacheEntry.cacheArray = cacheArray; cacheEntry.cacheArray = cacheArray;
cacheManager.push(cacheEntry); // necessary update, write response header to cache cacheManager.push(cacheEntry); // necessary update, write response header to cache
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS"); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
} }
} else { } else {
// the file is too big to cache it in the ram, or the size is unknown // the file is too big to cache it in the ram, or the size is unknown
// write to file right here. // write to file right here.
@ -701,7 +704,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
" StoreError=" + ((storeError==null)?"None":storeError) + " StoreError=" + ((storeError==null)?"None":storeError) +
" StoreHTCache=" + storeHTCache + " StoreHTCache=" + storeHTCache +
" SupportetContent=" + isSupportedContent); " SupportetContent=" + isSupportedContent);
res.writeContent(hfos, null); res.writeContent(hfos, null);
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize(); if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
if (sizeBeforeDelete == -1) { if (sizeBeforeDelete == -1) {
@ -715,7 +718,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
} }
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
} }
if (gzippedOut != null) { if (gzippedOut != null) {
gzippedOut.finish(); gzippedOut.finish();
} }
@ -908,7 +911,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
String hostlow = host.toLowerCase(); String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
// re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args);
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, remotePath)) {
httpd.sendRespondError(conProp,respond,4,403,null, httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -927,9 +934,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// resolve yacy and yacyh domains // resolve yacy and yacyh domains
String yAddress = yacyCore.seedDB.resolveYacyAddress(host); String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
// re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args);
// attach possible yacy-sublevel-domain // attach possible yacy-sublevel-domain
if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath; if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath;
@ -987,7 +991,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
int port, pos; int port, pos;
if ((pos = host.indexOf(":")) < 0) { if ((pos = host.indexOf(":")) < 0) {
port = 80; port = 80;
@ -1101,46 +1105,49 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException { public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
this.connectionProperties = conProp; this.connectionProperties = conProp;
switchboard.proxyLastAccess = System.currentTimeMillis(); switchboard.proxyLastAccess = System.currentTimeMillis();
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER); String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS);
if (args != null) { path = path + "?" + args; }
int port, pos; int port, pos;
if ((pos = host.indexOf(":")) < 0) { if ((pos = host.indexOf(":")) < 0) {
port = 80; port = 80;
} else { } else {
port = Integer.parseInt(host.substring(pos + 1)); port = Integer.parseInt(host.substring(pos + 1));
host = host.substring(0, pos); host = host.substring(0, pos);
} }
// check the blacklist // check the blacklist
// blacklist idea inspired by [AS]: // blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, "/")) { if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
httpd.sendRespondError(conProp,clientOut,4,403,null, httpd.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
forceConnectionClose(); forceConnectionClose();
return; return;
} }
// possibly branch into PROXY-PROXY connection // possibly branch into PROXY-PROXY connection
if ( if (
(switchboard.remoteProxyConfig != null) && (switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig.useProxy()) && (switchboard.remoteProxyConfig.useProxy()) &&
(switchboard.remoteProxyConfig.useProxy4SSL()) (switchboard.remoteProxyConfig.useProxy4SSL())
) { ) {
httpc remoteProxy = null; httpc remoteProxy = null;
try { try {
remoteProxy = httpc.getInstance( remoteProxy = httpc.getInstance(
host, host,
port, port,
timeout, timeout,
false, false,
switchboard.remoteProxyConfig switchboard.remoteProxyConfig
); );
httpc.response response = remoteProxy.CONNECT(host, port, requestHeader); httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
response.print(); response.print();
if (response.success()) { if (response.success()) {
@ -1160,8 +1167,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
} finally { } finally {
if (remoteProxy != null) httpc.returnInstance(remoteProxy); if (remoteProxy != null) httpc.returnInstance(remoteProxy);
} }
} }
// try to establish connection to remote host // try to establish connection to remote host
Socket sslSocket = new Socket(host, port); Socket sslSocket = new Socket(host, port);
sslSocket.setSoTimeout(timeout); // waiting time for write sslSocket.setSoTimeout(timeout); // waiting time for write