mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
adapted for isListed()
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1942 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
8b14a0c833
commit
77f3237de3
|
@ -1,12 +1,13 @@
|
||||||
// httpdProxyHandler.java
|
// httpdProxyHandler.java
|
||||||
// -----------------------
|
// -----------------------
|
||||||
// part of YACY
|
// part of YACY
|
||||||
// (C) by Michael Peter Christen; mc@anomic.de
|
// (C) by Michael Peter Christen; mc@anomic.de
|
||||||
// first published on http://www.anomic.de
|
// first published on http://www.anomic.de
|
||||||
// Frankfurt, Germany, 2004
|
// Frankfurt, Germany, 2004
|
||||||
//
|
//
|
||||||
// last major change: $LastChangedDate$ by $LastChangedBy$
|
// $LastChangedDate$
|
||||||
// Revision: $LastChangedRevision$
|
// $LastChangedRevision$
|
||||||
|
// $LastChangedBy$
|
||||||
//
|
//
|
||||||
// This program is free software; you can redistribute it and/or modify
|
// This program is free software; you can redistribute it and/or modify
|
||||||
// it under the terms of the GNU General Public License as published by
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
@ -323,29 +324,30 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream)
|
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, de.anomic.http.httpHeader, java.io.OutputStream)
|
||||||
*/
|
*/
|
||||||
public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException {
|
public void doGet(Properties conProp, httpHeader requestHeader, OutputStream respond) throws IOException {
|
||||||
|
|
||||||
this.connectionProperties = conProp;
|
this.connectionProperties = conProp;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// remembering the starting time of the request
|
// remembering the starting time of the request
|
||||||
final Date requestDate = new Date(); // remember the time...
|
final Date requestDate = new Date(); // remember the time...
|
||||||
this.connectionProperties.put(httpHeader.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
|
this.connectionProperties.put(httpHeader.CONNECTION_PROP_REQUEST_START,new Long(requestDate.getTime()));
|
||||||
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
|
if (yacyTrigger) de.anomic.yacy.yacyCore.triggerOnlineAction();
|
||||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||||
|
|
||||||
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
|
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
|
||||||
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpHeader.CONNECTION_PROP_REQUESTLINE).length() + 2);
|
respond = new httpdByteCountOutputStream(respond,conProp.getProperty(httpHeader.CONNECTION_PROP_REQUESTLINE).length() + 2);
|
||||||
|
|
||||||
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
|
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
|
||||||
final String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
|
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH); // always starts with leading '/'
|
||||||
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
|
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
|
||||||
final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
|
final String ip = conProp.getProperty(httpHeader.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
|
||||||
int pos=0;
|
int pos=0;
|
||||||
int port=0;
|
int port=0;
|
||||||
|
|
||||||
URL url = null;
|
URL url = null;
|
||||||
try {
|
try {
|
||||||
url = httpHeader.getRequestURL(conProp);
|
url = httpHeader.getRequestURL(conProp);
|
||||||
|
|
||||||
//redirector
|
//redirector
|
||||||
if (redirectorEnabled){
|
if (redirectorEnabled){
|
||||||
synchronized(redirectorProcess){
|
synchronized(redirectorProcess){
|
||||||
|
@ -377,7 +379,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
port = Integer.parseInt(host.substring(pos + 1));
|
port = Integer.parseInt(host.substring(pos + 1));
|
||||||
host = host.substring(0, pos);
|
host = host.substring(0, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
String ext;
|
String ext;
|
||||||
if ((pos = path.lastIndexOf('.')) < 0) {
|
if ((pos = path.lastIndexOf('.')) < 0) {
|
||||||
ext = "";
|
ext = "";
|
||||||
|
@ -389,16 +391,17 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
// blacklist idea inspired by [AS]:
|
// blacklist idea inspired by [AS]:
|
||||||
// respond a 404 for all AGIS ("all you get is shit") servers
|
// respond a 404 for all AGIS ("all you get is shit") servers
|
||||||
String hostlow = host.toLowerCase();
|
String hostlow = host.toLowerCase();
|
||||||
|
if (args != null) { path = path + "?" + args; }
|
||||||
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
|
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
|
||||||
httpd.sendRespondError(conProp,respond,4,403,null,
|
httpd.sendRespondError(conProp,respond,4,403,null,
|
||||||
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
||||||
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle outgoing cookies
|
// handle outgoing cookies
|
||||||
handleOutgoingCookies(requestHeader, host, ip);
|
handleOutgoingCookies(requestHeader, host, ip);
|
||||||
|
|
||||||
// set another userAgent, if not yellowlisted
|
// set another userAgent, if not yellowlisted
|
||||||
if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) {
|
if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) {
|
||||||
// change the User-Agent
|
// change the User-Agent
|
||||||
|
@ -441,7 +444,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
requestDate, // init date
|
requestDate, // init date
|
||||||
0, // crawling depth
|
0, // crawling depth
|
||||||
url, // url
|
url, // url
|
||||||
"", // name of the url is unknown
|
"", // name of the url is unknown
|
||||||
requestHeader, // request headers
|
requestHeader, // request headers
|
||||||
"200 OK", // request status
|
"200 OK", // request status
|
||||||
cachedResponseHeader, // response headers
|
cachedResponseHeader, // response headers
|
||||||
|
@ -646,9 +649,9 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
// ok, we don't write actually into a file, only to RAM, and schedule writing the file.
|
// ok, we don't write actually into a file, only to RAM, and schedule writing the file.
|
||||||
byte[] cacheArray = res.writeContent(hfos);
|
byte[] cacheArray = res.writeContent(hfos);
|
||||||
this.theLogger.logFine("writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
|
this.theLogger.logFine("writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
|
||||||
|
|
||||||
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
|
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
|
||||||
|
|
||||||
if (sizeBeforeDelete == -1) {
|
if (sizeBeforeDelete == -1) {
|
||||||
// totally fresh file
|
// totally fresh file
|
||||||
//cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
|
//cacheEntry.status = plasmaHTCache.CACHE_FILL; // it's an insert
|
||||||
|
@ -667,7 +670,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
cacheEntry.cacheArray = cacheArray;
|
cacheEntry.cacheArray = cacheArray;
|
||||||
cacheManager.push(cacheEntry); // necessary update, write response header to cache
|
cacheManager.push(cacheEntry); // necessary update, write response header to cache
|
||||||
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
|
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_MISS");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// the file is too big to cache it in the ram, or the size is unknown
|
// the file is too big to cache it in the ram, or the size is unknown
|
||||||
// write to file right here.
|
// write to file right here.
|
||||||
|
@ -701,7 +704,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
" StoreError=" + ((storeError==null)?"None":storeError) +
|
" StoreError=" + ((storeError==null)?"None":storeError) +
|
||||||
" StoreHTCache=" + storeHTCache +
|
" StoreHTCache=" + storeHTCache +
|
||||||
" SupportetContent=" + isSupportedContent);
|
" SupportetContent=" + isSupportedContent);
|
||||||
|
|
||||||
res.writeContent(hfos, null);
|
res.writeContent(hfos, null);
|
||||||
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
|
if (hfos instanceof htmlFilterOutputStream) ((htmlFilterOutputStream) hfos).finalize();
|
||||||
if (sizeBeforeDelete == -1) {
|
if (sizeBeforeDelete == -1) {
|
||||||
|
@ -715,7 +718,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
}
|
}
|
||||||
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
|
conProp.setProperty(httpHeader.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gzippedOut != null) {
|
if (gzippedOut != null) {
|
||||||
gzippedOut.finish();
|
gzippedOut.finish();
|
||||||
}
|
}
|
||||||
|
@ -908,7 +911,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
|
|
||||||
// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
|
// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
|
||||||
String hostlow = host.toLowerCase();
|
String hostlow = host.toLowerCase();
|
||||||
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
|
|
||||||
|
// re-calc the url path
|
||||||
|
String remotePath = (args == null) ? path : (path + "?" + args);
|
||||||
|
|
||||||
|
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, remotePath)) {
|
||||||
httpd.sendRespondError(conProp,respond,4,403,null,
|
httpd.sendRespondError(conProp,respond,4,403,null,
|
||||||
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
||||||
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
||||||
|
@ -927,9 +934,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
// resolve yacy and yacyh domains
|
// resolve yacy and yacyh domains
|
||||||
String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
|
String yAddress = yacyCore.seedDB.resolveYacyAddress(host);
|
||||||
|
|
||||||
// re-calc the url path
|
|
||||||
String remotePath = (args == null) ? path : (path + "?" + args);
|
|
||||||
|
|
||||||
// attach possible yacy-sublevel-domain
|
// attach possible yacy-sublevel-domain
|
||||||
if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath;
|
if ((yAddress != null) && ((pos = yAddress.indexOf("/")) >= 0)) remotePath = yAddress.substring(pos) + remotePath;
|
||||||
|
|
||||||
|
@ -987,7 +991,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
|
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
|
||||||
String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
|
String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS); // may be null if no args were given
|
||||||
String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
|
String httpVer = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
|
||||||
|
|
||||||
int port, pos;
|
int port, pos;
|
||||||
if ((pos = host.indexOf(":")) < 0) {
|
if ((pos = host.indexOf(":")) < 0) {
|
||||||
port = 80;
|
port = 80;
|
||||||
|
@ -1101,46 +1105,49 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
|
public void doConnect(Properties conProp, de.anomic.http.httpHeader requestHeader, InputStream clientIn, OutputStream clientOut) throws IOException {
|
||||||
this.connectionProperties = conProp;
|
this.connectionProperties = conProp;
|
||||||
switchboard.proxyLastAccess = System.currentTimeMillis();
|
switchboard.proxyLastAccess = System.currentTimeMillis();
|
||||||
|
|
||||||
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
|
String host = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
|
||||||
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
|
String httpVersion = conProp.getProperty(httpHeader.CONNECTION_PROP_HTTP_VER);
|
||||||
|
String path = conProp.getProperty(httpHeader.CONNECTION_PROP_PATH);
|
||||||
|
final String args = conProp.getProperty(httpHeader.CONNECTION_PROP_ARGS);
|
||||||
|
if (args != null) { path = path + "?" + args; }
|
||||||
|
|
||||||
int port, pos;
|
int port, pos;
|
||||||
if ((pos = host.indexOf(":")) < 0) {
|
if ((pos = host.indexOf(":")) < 0) {
|
||||||
port = 80;
|
port = 80;
|
||||||
} else {
|
} else {
|
||||||
port = Integer.parseInt(host.substring(pos + 1));
|
port = Integer.parseInt(host.substring(pos + 1));
|
||||||
host = host.substring(0, pos);
|
host = host.substring(0, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check the blacklist
|
// check the blacklist
|
||||||
// blacklist idea inspired by [AS]:
|
// blacklist idea inspired by [AS]:
|
||||||
// respond a 404 for all AGIS ("all you get is shit") servers
|
// respond a 404 for all AGIS ("all you get is shit") servers
|
||||||
String hostlow = host.toLowerCase();
|
final String hostlow = host.toLowerCase();
|
||||||
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, "/")) {
|
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
|
||||||
httpd.sendRespondError(conProp,clientOut,4,403,null,
|
httpd.sendRespondError(conProp,clientOut,4,403,null,
|
||||||
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
||||||
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
|
||||||
forceConnectionClose();
|
forceConnectionClose();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// possibly branch into PROXY-PROXY connection
|
// possibly branch into PROXY-PROXY connection
|
||||||
if (
|
if (
|
||||||
(switchboard.remoteProxyConfig != null) &&
|
(switchboard.remoteProxyConfig != null) &&
|
||||||
(switchboard.remoteProxyConfig.useProxy()) &&
|
(switchboard.remoteProxyConfig.useProxy()) &&
|
||||||
(switchboard.remoteProxyConfig.useProxy4SSL())
|
(switchboard.remoteProxyConfig.useProxy4SSL())
|
||||||
) {
|
) {
|
||||||
httpc remoteProxy = null;
|
httpc remoteProxy = null;
|
||||||
try {
|
try {
|
||||||
remoteProxy = httpc.getInstance(
|
remoteProxy = httpc.getInstance(
|
||||||
host,
|
host,
|
||||||
port,
|
port,
|
||||||
timeout,
|
timeout,
|
||||||
false,
|
false,
|
||||||
switchboard.remoteProxyConfig
|
switchboard.remoteProxyConfig
|
||||||
);
|
);
|
||||||
|
|
||||||
httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
|
httpc.response response = remoteProxy.CONNECT(host, port, requestHeader);
|
||||||
response.print();
|
response.print();
|
||||||
if (response.success()) {
|
if (response.success()) {
|
||||||
|
@ -1160,8 +1167,8 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
} finally {
|
} finally {
|
||||||
if (remoteProxy != null) httpc.returnInstance(remoteProxy);
|
if (remoteProxy != null) httpc.returnInstance(remoteProxy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to establish connection to remote host
|
// try to establish connection to remote host
|
||||||
Socket sslSocket = new Socket(host, port);
|
Socket sslSocket = new Socket(host, port);
|
||||||
sslSocket.setSoTimeout(timeout); // waiting time for write
|
sslSocket.setSoTimeout(timeout); // waiting time for write
|
||||||
|
|
Loading…
Reference in New Issue
Block a user