mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
788288eb9e
The default schema uses only some of them and the resting search index has now the following properties: - webgraph size will have about 40 times as much entries as default index - the complete index size will increase and may be about the double size of current amount As testing showed, not much indexing performance is lost. The default index will be smaller (moved fields out of it); thus searching can be faster. The new index will cause that some old parts in YaCy can be removed, i.e. specialized webgraph data and the noload crawler. The new index will make it possible to: - search within link texts of linked but not indexed documents (about 20 times of document index in size!!) - get a very detailed link graph - enhance ranking using a complete link graph To get the full access to the new index, the API to solr has now two access points: one with attribute core=collection1 for the default search index and core=webgraph to the new webgraph search index. This is also avaiable for p2p operation but client access is not yet implemented.
1682 lines
77 KiB
Java
1682 lines
77 KiB
Java
// HTTPDProxyHandler.java
|
|
// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
|
// first published 2004 on http://yacy.net
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// LICENSE
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
// Contributions:
|
|
// [AS] Alexander Schier: Blacklist (404 response for AGIS hosts)
|
|
// [TL] Timo Leise: url-wildcards for blacklists
|
|
|
|
/*
|
|
Class documentation:
|
|
This class is a servlet to the httpd daemon. It is accessed each time
|
|
an URL in a GET, HEAD or POST command contains the whole host information
|
|
or a host is given in the header host field of an HTTP/1.0 / HTTP/1.1
|
|
command.
|
|
Transparency is maintained, whenever appropriate. We change header
|
|
attributes if necessary for the indexing mechanism; i.e. we do not
|
|
support gzip-ed encoding. We also do not support unrealistic
|
|
'expires' values that would force a cache to be flushed immediately
|
|
pragma non-cache attributes are supported
|
|
*/
|
|
|
|
|
|
package net.yacy.server.http;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.InputStreamReader;
|
|
import java.io.OutputStream;
|
|
import java.io.PrintWriter;
|
|
import java.net.BindException;
|
|
import java.net.ConnectException;
|
|
import java.net.InetAddress;
|
|
import java.net.MalformedURLException;
|
|
import java.net.NoRouteToHostException;
|
|
import java.net.Socket;
|
|
import java.net.SocketException;
|
|
import java.net.SocketTimeoutException;
|
|
import java.net.UnknownHostException;
|
|
import java.util.Arrays;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.logging.FileHandler;
|
|
import java.util.logging.Level;
|
|
import java.util.logging.LogManager;
|
|
import java.util.logging.Logger;
|
|
|
|
import net.yacy.cora.document.UTF8;
|
|
import net.yacy.cora.protocol.ClientIdentification;
|
|
import net.yacy.cora.protocol.Domains;
|
|
import net.yacy.cora.protocol.HeaderFramework;
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
import net.yacy.cora.protocol.ResponseHeader;
|
|
import net.yacy.cora.protocol.http.HTTPClient;
|
|
import net.yacy.cora.protocol.http.ProxySettings;
|
|
import net.yacy.cora.protocol.http.ProxySettings.Protocol;
|
|
import net.yacy.crawler.data.Cache;
|
|
import net.yacy.crawler.retrieval.Request;
|
|
import net.yacy.crawler.retrieval.Response;
|
|
import net.yacy.document.TextParser;
|
|
import net.yacy.document.parser.html.ContentTransformer;
|
|
import net.yacy.document.parser.html.Transformer;
|
|
import net.yacy.kelondro.data.meta.DigestURI;
|
|
import net.yacy.kelondro.io.ByteCountOutputStream;
|
|
import net.yacy.kelondro.logging.Log;
|
|
import net.yacy.kelondro.util.FileUtils;
|
|
import net.yacy.repository.Blacklist.BlacklistType;
|
|
import net.yacy.search.Switchboard;
|
|
import net.yacy.search.SwitchboardConstants;
|
|
import net.yacy.server.serverCore;
|
|
import net.yacy.server.serverObjects;
|
|
|
|
public final class HTTPDProxyHandler {
|
|
|
|
|
|
private static final String yacyProxyUserAgent = "yacyproxy (" + ClientIdentification.yacySystem +") http://yacy.net/bot.html";
|
|
|
|
// static variables
|
|
// can only be instantiated upon first instantiation of this class object
|
|
private static Switchboard sb = null;
|
|
private static final HashSet<String> yellowList;
|
|
private static int timeout = 60000;
|
|
private static boolean yacyTrigger = true;
|
|
public static boolean isTransparentProxy = false;
|
|
private static Process redirectorProcess = null;
|
|
private static boolean redirectorEnabled = false;
|
|
private static PrintWriter redirectorWriter = null;
|
|
private static BufferedReader redirectorReader = null;
|
|
|
|
private static Transformer transformer = null;
|
|
private static File htRootPath = null;
|
|
|
|
//private Properties connectionProperties = null;
|
|
// creating a logger
|
|
private static final Log log = new Log("PROXY");
|
|
|
|
private static boolean doAccessLogging = false;
|
|
/**
|
|
* Do logging configuration for special proxy access log file
|
|
*/
|
|
static {
|
|
|
|
// get a switchboard
|
|
sb = Switchboard.getSwitchboard();
|
|
if (sb != null) {
|
|
|
|
isTransparentProxy = Boolean.parseBoolean(sb.getConfig("isTransparentProxy","false"));
|
|
|
|
// set timeout
|
|
timeout = Integer.parseInt(sb.getConfig("proxy.clientTimeout", "10000"));
|
|
|
|
// create a htRootPath: system pages
|
|
htRootPath = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot"));
|
|
if (!(htRootPath.exists())) {
|
|
if(!htRootPath.mkdir())
|
|
Log.logSevere("PROXY", "could not create htRoot "+ htRootPath);
|
|
}
|
|
|
|
// do logger initialization
|
|
try {
|
|
log.logInfo("Configuring proxy access logging ...");
|
|
|
|
// getting the logging manager
|
|
final LogManager manager = LogManager.getLogManager();
|
|
final String className = HTTPDProxyHandler.class.getName();
|
|
|
|
// determining if proxy access logging is enabled
|
|
final String enabled = manager.getProperty(className + ".logging.enabled");
|
|
if ("true".equalsIgnoreCase(enabled)) {
|
|
|
|
// reading out some needed configuration properties
|
|
int limit = 1024*1024, count = 20;
|
|
String pattern = manager.getProperty(className + ".logging.FileHandler.pattern");
|
|
if (pattern == null) pattern = "DATA/LOG/proxyAccess%u%g.log";
|
|
// make pattern absolute
|
|
if (!new File(pattern).isAbsolute()) pattern = new File(sb.getDataPath(), pattern).getAbsolutePath();
|
|
|
|
final String limitStr = manager.getProperty(className + ".logging.FileHandler.limit");
|
|
if (limitStr != null) try { limit = Integer.parseInt(limitStr); } catch (final NumberFormatException e) {}
|
|
|
|
final String countStr = manager.getProperty(className + ".logging.FileHandler.count");
|
|
if (countStr != null) try { count = Integer.parseInt(countStr); } catch (final NumberFormatException e) {}
|
|
|
|
// creating the proxy access logger
|
|
final Logger proxyLogger = Logger.getLogger("PROXY.access");
|
|
proxyLogger.setUseParentHandlers(false);
|
|
proxyLogger.setLevel(Level.FINEST);
|
|
|
|
final FileHandler txtLog = new FileHandler(pattern, limit, count, true);
|
|
txtLog.setFormatter(new ProxyLogFormatter());
|
|
txtLog.setLevel(Level.FINEST);
|
|
proxyLogger.addHandler(txtLog);
|
|
|
|
doAccessLogging = true;
|
|
log.logInfo("Proxy access logging configuration done." +
|
|
"\n\tFilename: " + pattern +
|
|
"\n\tLimit: " + limitStr +
|
|
"\n\tCount: " + countStr);
|
|
} else {
|
|
log.logInfo("Proxy access logging is deactivated.");
|
|
}
|
|
} catch (final Exception e) {
|
|
log.logSevere("Unable to configure proxy access logging.",e);
|
|
}
|
|
|
|
// load a transformer
|
|
transformer = new ContentTransformer();
|
|
transformer.init(new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.LIST_BLUE, "")).toString());
|
|
|
|
// load the yellow-list
|
|
final String f = sb.getConfig("proxyYellowList", null);
|
|
if (f != null) {
|
|
yellowList = FileUtils.loadList(new File(f));
|
|
log.logConfig("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries");
|
|
} else {
|
|
yellowList = new HashSet<String>();
|
|
}
|
|
|
|
final String redirectorPath = sb.getConfig("externalRedirector", "");
|
|
if (redirectorPath.length() > 0 && !redirectorEnabled) {
|
|
try {
|
|
redirectorProcess=Runtime.getRuntime().exec(redirectorPath);
|
|
redirectorWriter = new PrintWriter(redirectorProcess.getOutputStream());
|
|
redirectorReader = new BufferedReader(new InputStreamReader(redirectorProcess.getInputStream()));
|
|
redirectorEnabled=true;
|
|
} catch (final IOException e) {
|
|
System.out.println("redirector not Found");
|
|
}
|
|
}
|
|
} else {
|
|
yellowList = null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Special logger instance for proxy access logging much similar
|
|
* to the squid access.log file
|
|
*/
|
|
private static final Log proxyLog = new Log("PROXY.access");
|
|
|
|
/**
|
|
* Reusable {@link StringBuilder} for logging
|
|
*/
|
|
private static final StringBuilder logMessage = new StringBuilder();
|
|
|
|
/**
|
|
* Reusable {@link StringBuilder} to generate the useragent string
|
|
*/
|
|
private static final StringBuilder userAgentStr = new StringBuilder();
|
|
|
|
private static void handleOutgoingCookies(final RequestHeader requestHeader, final String targethost, final String clienthost) {
|
|
/*
|
|
The syntax for the header is:
|
|
|
|
cookie = "Cookie:" cookie-version
|
|
1*((";" | ",") cookie-value)
|
|
cookie-value = NAME "=" VALUE [";" path] [";" domain]
|
|
cookie-version = "$Version" "=" value
|
|
NAME = attr
|
|
VALUE = value
|
|
path = "$Path" "=" value
|
|
domain = "$Domain" "=" value
|
|
*/
|
|
if (sb.getConfigBool("proxy.monitorCookies", false)) {
|
|
if (requestHeader.containsKey(RequestHeader.COOKIE)) {
|
|
final Object[] entry = new Object[]{new Date(), clienthost, requestHeader.getMultiple(RequestHeader.COOKIE)};
|
|
synchronized(sb.outgoingCookies) {
|
|
sb.outgoingCookies.put(targethost, entry);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private static void handleIncomingCookies(final ResponseHeader respondHeader, final String serverhost, final String targetclient) {
|
|
/*
|
|
The syntax for the Set-Cookie response header is
|
|
|
|
set-cookie = "Set-Cookie:" cookies
|
|
cookies = 1#cookie
|
|
cookie = NAME "=" VALUE *(";" cookie-av)
|
|
NAME = attr
|
|
VALUE = value
|
|
cookie-av = "Comment" "=" value
|
|
| "Domain" "=" value
|
|
| "Max-Age" "=" value
|
|
| "Path" "=" value
|
|
| "Secure"
|
|
| "Version" "=" 1*DIGIT
|
|
*/
|
|
if (sb.getConfigBool("proxy.monitorCookies", false)) {
|
|
if (respondHeader.containsKey(HeaderFramework.SET_COOKIE)) {
|
|
final Object[] entry = new Object[]{new Date(), targetclient, respondHeader.getMultiple(HeaderFramework.SET_COOKIE)};
|
|
synchronized(sb.incomingCookies) {
|
|
sb.incomingCookies.put(serverhost, entry);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param conProp a collection of properties about the connection, like URL
|
|
* @param requestHeader The header lines of the connection from the request
|
|
* @param respond the OutputStream to the client
|
|
* @see de.anomic.http.httpdHandler#doGet(java.util.Properties, net.yacy.cora.protocol.HeaderFramework, java.io.OutputStream)
|
|
*/
|
|
public static void doGet(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream respond) {
|
|
ByteCountOutputStream countedRespond = null;
|
|
try {
|
|
final int reqID = requestHeader.hashCode();
|
|
// remembering the starting time of the request
|
|
final Date requestDate = new Date(); // remember the time...
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime()));
|
|
if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction();
|
|
sb.proxyLastAccess = System.currentTimeMillis();
|
|
|
|
// using an ByteCount OutputStream to count the send bytes (needed for the logfile)
|
|
countedRespond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY");
|
|
|
|
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); // always starts with leading '/'
|
|
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given
|
|
final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
|
|
int pos=0;
|
|
int port=0;
|
|
|
|
DigestURI url = null;
|
|
try {
|
|
url = HeaderFramework.getRequestURL(conProp);
|
|
if (log.isFine()) log.logFine(reqID +" GET "+ url);
|
|
if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader);
|
|
|
|
//redirector
|
|
if (redirectorEnabled){
|
|
synchronized(redirectorProcess){
|
|
redirectorWriter.println(url.toNormalform(true));
|
|
redirectorWriter.flush();
|
|
}
|
|
final String newUrl = redirectorReader.readLine();
|
|
if (!newUrl.equals("")) {
|
|
try {
|
|
url = new DigestURI(newUrl);
|
|
} catch(final MalformedURLException e){}//just keep the old one
|
|
}
|
|
if (log.isFinest()) log.logFinest(reqID +" using redirector to "+ url);
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort());
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath());
|
|
requestHeader.put(HeaderFramework.HOST, url.getHost()+":"+url.getPort());
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath());
|
|
}
|
|
} catch (final MalformedURLException e) {
|
|
final String errorMsg = "ERROR: internal error with url generation: host=" +
|
|
host + ", port=" + port + ", path=" + path + ", args=" + args;
|
|
log.logSevere(errorMsg);
|
|
HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e);
|
|
return;
|
|
}
|
|
|
|
if ((pos = host.indexOf(':')) < 0) {
|
|
port = 80;
|
|
} else {
|
|
port = Integer.parseInt(host.substring(pos + 1));
|
|
host = host.substring(0, pos);
|
|
}
|
|
|
|
// check the blacklist
|
|
// blacklist idea inspired by [AS]:
|
|
// respond a 404 for all AGIS ("all you get is shit") servers
|
|
final String hostlow = host.toLowerCase();
|
|
if (args != null) { path = path + "?" + args; }
|
|
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
|
|
log.logInfo("AGIS blocking of host '" + hostlow + "'");
|
|
HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null,
|
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
|
return;
|
|
}
|
|
|
|
// handle outgoing cookies
|
|
handleOutgoingCookies(requestHeader, host, ip);
|
|
prepareRequestHeader(conProp, requestHeader, hostlow);
|
|
final ResponseHeader cachedResponseHeader = Cache.getResponseHeader(url.hash());
|
|
|
|
// why are files unzipped upon arrival? why not zip all files in cache?
|
|
// This follows from the following premises
|
|
// (a) no file shall be unzip-ed more than once to prevent unnecessary computing time
|
|
// (b) old cache entries shall be comparable with refill-entries to detect/distinguish case 3+4
|
|
// (c) the indexing mechanism needs files unzip-ed, a schedule could do that later
|
|
// case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped
|
|
// and the newly arrival would be zipped and would have to be unzipped upon load. But then the
|
|
// scheduler is superfluous. Therefore the only reminding case is
|
|
// (d) cached files shall be either all zipped or unzipped
|
|
// case d contradicts with a, because files need to be unzipped for indexing. Therefore
|
|
// the only remaining case is to unzip files right upon load. Thats what we do here.
|
|
|
|
// finally use existing cache if appropriate
|
|
// here we must decide weather or not to save the data
|
|
// to a cache
|
|
// we distinguish four CACHE STATE cases:
|
|
// 1. cache fill
|
|
// 2. cache fresh - no refill
|
|
// 3. cache stale - refill - necessary
|
|
// 4. cache stale - refill - superfluous
|
|
// in two of these cases we trigger a scheduler to handle newly arrived files:
|
|
// case 1 and case 3
|
|
if (cachedResponseHeader == null) {
|
|
if (log.isFinest()) log.logFinest(reqID + " page not in cache: fulfill request from web");
|
|
fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond);
|
|
} else {
|
|
final Request request = new Request(
|
|
null,
|
|
url,
|
|
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
|
|
"",
|
|
cachedResponseHeader.lastModified(),
|
|
sb.crawler.defaultProxyProfile.handle(),
|
|
0,
|
|
0,
|
|
0,
|
|
0);
|
|
final Response response = new Response(
|
|
request,
|
|
requestHeader,
|
|
cachedResponseHeader,
|
|
sb.crawler.defaultProxyProfile,
|
|
true
|
|
);
|
|
final byte[] cacheContent = Cache.getContent(url.hash());
|
|
if (cacheContent != null && response.isFreshForProxy()) {
|
|
if (log.isFinest()) log.logFinest(reqID + " fulfill request from cache");
|
|
fulfillRequestFromCache(conProp, url, requestHeader, cachedResponseHeader, cacheContent, countedRespond);
|
|
} else {
|
|
if (log.isFinest()) log.logFinest(reqID + " fulfill request from web");
|
|
fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond);
|
|
}
|
|
}
|
|
|
|
|
|
} catch (final Exception e) {
|
|
try {
|
|
final String exTxt = e.getMessage();
|
|
if ((exTxt!=null)&&(exTxt.startsWith("Socket closed"))) {
|
|
forceConnectionClose(conProp);
|
|
} else if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
|
|
final String errorMsg = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage();
|
|
HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e);
|
|
log.logSevere(errorMsg);
|
|
} else {
|
|
forceConnectionClose(conProp);
|
|
}
|
|
} catch (final Exception ee) {
|
|
forceConnectionClose(conProp);
|
|
}
|
|
} finally {
|
|
try { if(countedRespond != null) countedRespond.flush(); else if(respond != null) respond.flush(); } catch (final Exception e) {}
|
|
if (countedRespond != null) countedRespond.finish();
|
|
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_END, Long.valueOf(System.currentTimeMillis()));
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE,(countedRespond != null) ? Long.toString(countedRespond.getCount()) : -1L);
|
|
logProxyAccess(conProp);
|
|
}
|
|
}
|
|
|
|
private static void fulfillRequestFromWeb(final HashMap<String, Object> conProp, final DigestURI url, final RequestHeader requestHeader, final ResponseHeader cachedResponseHeader, final OutputStream respond) {
|
|
try {
|
|
final boolean proxyAugmentation = sb.getConfigBool("proxyAugmentation", false);
|
|
final int reqID = requestHeader.hashCode();
|
|
|
|
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); // always starts with leading '/'
|
|
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given
|
|
final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer
|
|
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); // the ip from the connecting peer
|
|
|
|
int port, pos;
|
|
if ((pos = host.indexOf(':')) < 0) {
|
|
port = 80;
|
|
} else {
|
|
port = Integer.parseInt(host.substring(pos + 1));
|
|
host = host.substring(0, pos);
|
|
}
|
|
|
|
// point virtual directory to my peer
|
|
if (path.startsWith("/currentyacypeer/")) {
|
|
host = sb.peers.myIP();
|
|
port = sb.peers.myPort();
|
|
path = path.substring(16);
|
|
}
|
|
|
|
// resolve yacy and yacyh domains
|
|
String yAddress = resolveYacyDomains(host);
|
|
|
|
// re-calc the url path
|
|
final String remotePath = (args == null) ? path : (path + "?" + args); // with leading '/'
|
|
|
|
// remove yacy-subdomain-path, when accessing /env
|
|
if ( (yAddress != null)
|
|
&& (remotePath.startsWith("/env"))
|
|
&& ((pos = yAddress.indexOf('/')) != -1)
|
|
) yAddress = yAddress.substring(0, yAddress.indexOf('/'));
|
|
|
|
modifyProxyHeaders(requestHeader, httpVer);
|
|
|
|
final String connectHost = hostPart(host, port, yAddress);
|
|
final String getUrl = "http://"+ connectHost + remotePath;
|
|
|
|
requestHeader.remove(HeaderFramework.HOST);
|
|
|
|
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
|
|
|
|
// send request
|
|
try {
|
|
client.GET(getUrl);
|
|
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader);
|
|
|
|
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
|
|
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
|
|
// determine if it's an internal error of the httpc
|
|
if (responseHeader.isEmpty()) {
|
|
throw new Exception(client.getHttpResponse().getStatusLine().toString());
|
|
}
|
|
|
|
if (proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
|
|
// enable chunk encoding, because we don't know the length after annotating
|
|
responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
|
|
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
|
|
}
|
|
|
|
ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, statusCode, respond);
|
|
|
|
// the cache does either not exist or is (supposed to be) stale
|
|
long sizeBeforeDelete = -1;
|
|
if (cachedResponseHeader != null) {
|
|
// delete the cache
|
|
final ResponseHeader rh = Cache.getResponseHeader(url.hash());
|
|
if (rh != null && (sizeBeforeDelete = rh.getContentLength()) == 0) {
|
|
final byte[] b = Cache.getContent(url.hash());
|
|
if (b != null) sizeBeforeDelete = b.length;
|
|
}
|
|
Cache.delete(url.hash());
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
|
|
}
|
|
|
|
// reserver cache entry
|
|
final Request request = new Request(
|
|
null,
|
|
url,
|
|
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
|
|
"",
|
|
responseHeader.lastModified(),
|
|
sb.crawler.defaultProxyProfile.handle(),
|
|
0,
|
|
0,
|
|
0,
|
|
sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete);
|
|
|
|
|
|
// handle incoming cookies
|
|
handleIncomingCookies(responseHeader, host, ip);
|
|
|
|
// prepareResponseHeader(responseHeader, res.getHttpVer());
|
|
prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString());
|
|
|
|
if(proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
|
|
// chunked encoding disables somewhere, add it again
|
|
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
|
|
}
|
|
|
|
// sending the respond header back to the client
|
|
if (chunkedOut != null) {
|
|
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
|
|
}
|
|
|
|
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
|
|
HTTPDemon.sendRespondHeader(
|
|
conProp,
|
|
respond,
|
|
httpVer,
|
|
statusCode,
|
|
client.getHttpResponse().getStatusLine().toString(), // status text
|
|
responseHeader);
|
|
|
|
if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) {
|
|
|
|
OutputStream outStream = chunkedOut != null ? chunkedOut : respond;
|
|
final Response response = new Response(
|
|
request,
|
|
requestHeader,
|
|
responseHeader,
|
|
sb.crawler.defaultProxyProfile,
|
|
true
|
|
);
|
|
final String storeError = response.shallStoreCacheForProxy();
|
|
final boolean storeHTCache = response.profile().storeHTCache();
|
|
final String supportError = TextParser.supports(response.url(), response.getMimeType());
|
|
|
|
if(proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
|
|
outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, requestHeader);
|
|
}
|
|
if (
|
|
/*
|
|
* Now we store the response into the htcache directory if
|
|
* a) the response is cacheable AND
|
|
*/
|
|
(storeError == null) &&
|
|
/*
|
|
* b) the user has configured to use the htcache OR
|
|
* c) the content should be indexed
|
|
*/
|
|
((storeHTCache) || (supportError != null))
|
|
) {
|
|
// we don't write actually into a file, only to RAM, and schedule writing the file.
|
|
// int l = res.getResponseHeader().size();
|
|
final int l = responseHeader.size();
|
|
final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l);
|
|
|
|
final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {outStream, byteStream});
|
|
// FileUtils.copy(res.getDataAsStream(), toClientAndMemory);
|
|
client.writeTo(toClientAndMemory);
|
|
// cached bytes
|
|
byte[] cacheArray;
|
|
if (byteStream.size() > 0) {
|
|
cacheArray = byteStream.toByteArray();
|
|
} else {
|
|
cacheArray = null;
|
|
}
|
|
if (log.isFine()) log.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length)));
|
|
|
|
if (sizeBeforeDelete == -1) {
|
|
// totally fresh file
|
|
response.setContent(cacheArray);
|
|
try {
|
|
Cache.store(response.url(), response.getResponseHeader(), cacheArray);
|
|
sb.toIndexer(response);
|
|
} catch (final IOException e) {
|
|
log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e);
|
|
}
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS");
|
|
} else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) {
|
|
// before we came here we deleted a cache entry
|
|
cacheArray = null;
|
|
//cacheManager.push(cacheEntry); // unnecessary update
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REF_FAIL_HIT");
|
|
} else {
|
|
// before we came here we deleted a cache entry
|
|
response.setContent(cacheArray);
|
|
try {
|
|
Cache.store(response.url(), response.getResponseHeader(), cacheArray);
|
|
sb.toIndexer(response);
|
|
} catch (final IOException e) {
|
|
log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e);
|
|
}
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS");
|
|
}
|
|
} else {
|
|
// no caching
|
|
if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." +
|
|
" StoreError=" + ((storeError==null)?"None":storeError) +
|
|
" StoreHTCache=" + storeHTCache +
|
|
" SupportError=" + supportError);
|
|
|
|
// FileUtils.copy(res.getDataAsStream(), outStream);
|
|
client.writeTo(outStream);
|
|
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS");
|
|
}
|
|
|
|
outStream.close();
|
|
|
|
if (chunkedOut != null) {
|
|
chunkedOut.finish();
|
|
chunkedOut.flush();
|
|
}
|
|
} // end hasBody
|
|
} catch(final SocketException se) {
|
|
// if opened ...
|
|
// if(res != null) {
|
|
// // client cut proxy connection, abort download
|
|
// res.abort();
|
|
// }
|
|
client.finish();
|
|
handleProxyException(se,conProp,respond,url);
|
|
} finally {
|
|
// if opened ...
|
|
// if(res != null) {
|
|
// // ... close connection
|
|
// res.closeStream();
|
|
// }
|
|
client.finish();
|
|
}
|
|
} catch (final Exception e) {
|
|
handleProxyException(e,conProp,respond,url);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* determines if the response should have a body
|
|
*
|
|
* @param statusCode
|
|
* @param responseHeader
|
|
* @return
|
|
*/
|
|
private static boolean hasBody(final int statusCode) {
|
|
// "All 1xx (informational), 204 (no content), and 304 (not modified) responses MUST NOT
|
|
// include a message-body."
|
|
// [RFC 2616 HTTP/1.1, Sect. 4.3] and like [RFC 1945 HTTP/1.0, Sect. 7.2]
|
|
if((statusCode >= 100 && statusCode < 200) || statusCode == 204 || statusCode == 304) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
private static void fulfillRequestFromCache(
|
|
final HashMap<String, Object> conProp,
|
|
final DigestURI url,
|
|
final RequestHeader requestHeader,
|
|
final ResponseHeader cachedResponseHeader,
|
|
final byte[] cacheEntry,
|
|
OutputStream respond
|
|
) throws IOException {
|
|
|
|
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
|
|
// we respond on the request by using the cache, the cache is fresh
|
|
try {
|
|
prepareResponseHeader(cachedResponseHeader, httpVer);
|
|
|
|
// replace date field in old header by actual date, this is according to RFC
|
|
cachedResponseHeader.put(HeaderFramework.DATE, HeaderFramework.formatRFC1123(new Date()));
|
|
|
|
// check if we can send a 304 instead the complete content
|
|
if (requestHeader.containsKey(RequestHeader.IF_MODIFIED_SINCE)) {
|
|
// conditional request: freshness of cache for that condition was already
|
|
// checked within shallUseCache(). Now send only a 304 response
|
|
log.logInfo("CACHE HIT/304 " + url.toString());
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_HIT");
|
|
|
|
// setting the content length header to 0
|
|
cachedResponseHeader.put(HeaderFramework.CONTENT_LENGTH, Integer.toString(0));
|
|
|
|
// send cached header with replaced date and added length
|
|
HTTPDemon.sendRespondHeader(conProp,respond,httpVer,304,cachedResponseHeader);
|
|
//respondHeader(respond, "304 OK", cachedResponseHeader); // respond with 'not modified'
|
|
} else {
|
|
// unconditional request: send content of cache
|
|
log.logInfo("CACHE HIT/203 " + url.toString());
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_HIT");
|
|
|
|
// setting the content header to the proper length
|
|
cachedResponseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(cacheEntry.length));
|
|
|
|
// send cached header with replaced date and added length
|
|
HTTPDemon.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader);
|
|
//respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative'
|
|
|
|
if(sb.getConfigBool("proxyAugmentation", false)
|
|
&& AugmentedHtmlStream.supportsMime(cachedResponseHeader.mime())) {
|
|
respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, requestHeader);
|
|
}
|
|
|
|
// send also the complete body now from the cache
|
|
// simply read the file and transfer to out socket
|
|
FileUtils.copy(cacheEntry, respond);
|
|
}
|
|
// that's it!
|
|
} catch (final Exception e) {
|
|
// this happens if the client stops loading the file
|
|
// we do nothing here
|
|
if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
|
|
log.logWarning("Error while trying to send cached message body.");
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PERSISTENT,"close");
|
|
} else {
|
|
HTTPDemon.sendRespondError(conProp,respond,4,503,"socket error: " + e.getMessage(),"socket error: " + e.getMessage(), e);
|
|
}
|
|
} finally {
|
|
try { respond.flush(); respond.close(); } catch (final Exception e) {}
|
|
}
|
|
return;
|
|
}
|
|
|
|
public static void doHead(final HashMap<String, Object> conProp, final RequestHeader requestHeader, OutputStream respond) {
|
|
|
|
// ResponseContainer res = null;
|
|
DigestURI url = null;
|
|
try {
|
|
final int reqID = requestHeader.hashCode();
|
|
// remembering the starting time of the request
|
|
final Date requestDate = new Date(); // remember the time...
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime()));
|
|
if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction();
|
|
sb.proxyLastAccess = System.currentTimeMillis();
|
|
|
|
// using an ByteCount OutputStream to count the send bytes
|
|
respond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY");
|
|
|
|
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
final String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH);
|
|
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS);
|
|
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
|
|
int port, pos;
|
|
if ((pos = host.indexOf(':')) < 0) {
|
|
port = 80;
|
|
} else {
|
|
port = Integer.parseInt(host.substring(pos + 1));
|
|
host = host.substring(0, pos);
|
|
}
|
|
|
|
try {
|
|
url = new DigestURI("http", host, port, (args == null) ? path : path + "?" + args);
|
|
} catch (final MalformedURLException e) {
|
|
final String errorMsg = "ERROR: internal error with url generation: host=" +
|
|
host + ", port=" + port + ", path=" + path + ", args=" + args;
|
|
log.logSevere(errorMsg);
|
|
HTTPDemon.sendRespondError(conProp,respond,4,501,null,errorMsg,e);
|
|
return;
|
|
}
|
|
if (log.isFine()) log.logFine(reqID +" HEAD "+ url);
|
|
if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader);
|
|
|
|
// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
|
|
final String hostlow = host.toLowerCase();
|
|
|
|
// re-calc the url path
|
|
final String remotePath = (args == null) ? path : (path + "?" + args);
|
|
|
|
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) {
|
|
HTTPDemon.sendRespondError(conProp,respond,4,403,null,
|
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
|
log.logInfo("AGIS blocking of host '" + hostlow + "'");
|
|
return;
|
|
}
|
|
|
|
prepareRequestHeader(conProp, requestHeader, hostlow);
|
|
|
|
// resolve yacy and yacyh domains
|
|
String yAddress = resolveYacyDomains(host);
|
|
|
|
// remove yacy-subdomain-path, when accessing /env
|
|
if ( (yAddress != null)
|
|
&& (remotePath.startsWith("/env"))
|
|
&& ((pos = yAddress.indexOf('/')) != -1)
|
|
) yAddress = yAddress.substring(0, yAddress.indexOf('/'));
|
|
|
|
modifyProxyHeaders(requestHeader, httpVer);
|
|
|
|
// generate request-url
|
|
final String connectHost = hostPart(host, port, yAddress);
|
|
final String getUrl = "http://"+ connectHost + remotePath;
|
|
if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl);
|
|
|
|
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
|
|
|
|
// send request
|
|
// try {
|
|
// res = client.HEAD(getUrl);
|
|
// if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine());
|
|
client.HEADResponse(getUrl);
|
|
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
|
|
|
|
// determine if it's an internal error of the httpc
|
|
// final ResponseHeader responseHeader = res.getResponseHeader();
|
|
// if (responseHeader.isEmpty()) {
|
|
// throw new Exception(res.getStatusLine());
|
|
// }
|
|
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
|
|
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
|
|
if (responseHeader.isEmpty()) {
|
|
throw new Exception(client.getHttpResponse().getStatusLine().toString());
|
|
}
|
|
|
|
// prepareResponseHeader(responseHeader, res.getHttpVer());
|
|
prepareResponseHeader(responseHeader, client.getHttpResponse().getStatusLine().getProtocolVersion().toString());
|
|
|
|
// sending the server respond back to the client
|
|
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
|
|
// HTTPDemon.sendRespondHeader(conProp,respond,httpVer,res.getStatusCode(),res.getStatusLine().substring(4),responseHeader);
|
|
HTTPDemon.sendRespondHeader(
|
|
conProp,
|
|
respond,
|
|
httpVer,
|
|
statusCode,
|
|
client.getHttpResponse().getStatusLine().toString(),
|
|
responseHeader);
|
|
respond.flush();
|
|
// } finally {
|
|
// if(res != null) {
|
|
// // ... close connection
|
|
// res.closeStream();
|
|
// }
|
|
// }
|
|
} catch (final Exception e) {
|
|
handleProxyException(e,conProp,respond,url);
|
|
}
|
|
}
|
|
|
|
public static void doPost(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream respond, final InputStream body) throws IOException {
|
|
assert conProp != null : "precondition violated: conProp != null";
|
|
assert requestHeader != null : "precondition violated: requestHeader != null";
|
|
assert body != null : "precondition violated: body != null";
|
|
DigestURI url = null;
|
|
ByteCountOutputStream countedRespond = null;
|
|
try {
|
|
final int reqID = requestHeader.hashCode();
|
|
// remembering the starting time of the request
|
|
final Date requestDate = new Date(); // remember the time...
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime()));
|
|
if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction();
|
|
sb.proxyLastAccess = System.currentTimeMillis();
|
|
|
|
// using an ByteCount OutputStream to count the send bytes
|
|
countedRespond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY");
|
|
|
|
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
final String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH);
|
|
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given
|
|
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
|
|
int port, pos;
|
|
if ((pos = host.indexOf(':')) < 0) {
|
|
port = 80;
|
|
} else {
|
|
port = Integer.parseInt(host.substring(pos + 1));
|
|
host = host.substring(0, pos);
|
|
}
|
|
|
|
try {
|
|
url = new DigestURI("http", host, port, (args == null) ? path : path + "?" + args);
|
|
} catch (final MalformedURLException e) {
|
|
final String errorMsg = "ERROR: internal error with url generation: host=" +
|
|
host + ", port=" + port + ", path=" + path + ", args=" + args;
|
|
log.logSevere(errorMsg);
|
|
HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e);
|
|
return;
|
|
}
|
|
if (log.isFine()) log.logFine(reqID +" POST "+ url);
|
|
if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader);
|
|
|
|
prepareRequestHeader(conProp, requestHeader, host.toLowerCase());
|
|
|
|
String yAddress = resolveYacyDomains(host);
|
|
|
|
// re-calc the url path
|
|
final String remotePath = (args == null) ? path : (path + "?" + args);
|
|
|
|
// remove yacy-subdomain-path, when accessing /env
|
|
if ( (yAddress != null)
|
|
&& (remotePath.startsWith("/env"))
|
|
&& ((pos = yAddress.indexOf('/')) != -1)
|
|
) yAddress = yAddress.substring(0, yAddress.indexOf('/'));
|
|
|
|
modifyProxyHeaders(requestHeader, httpVer);
|
|
|
|
final String connectHost = hostPart(host, port, yAddress);
|
|
final String getUrl = "http://"+ connectHost + remotePath;
|
|
if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl);
|
|
|
|
// the CONTENT_LENGTH will be added by entity and cause a ClientProtocolException if set
|
|
final int contentLength = requestHeader.getContentLength();
|
|
requestHeader.remove(HeaderFramework.CONTENT_LENGTH);
|
|
|
|
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
|
|
|
|
// check input
|
|
if(body == null) {
|
|
log.logSevere("no body to POST!");
|
|
}
|
|
try {
|
|
// sending the request
|
|
client.POST(getUrl, body, contentLength);
|
|
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
|
|
|
|
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
|
|
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
|
|
// determine if it's an internal error of the httpc
|
|
if (responseHeader.isEmpty()) {
|
|
throw new Exception(client.getHttpResponse().getStatusLine().toString());
|
|
}
|
|
|
|
final ChunkedOutputStream chunked = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), countedRespond);
|
|
|
|
prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString());
|
|
|
|
// sending the respond header back to the client
|
|
if (chunked != null) {
|
|
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
|
|
}
|
|
|
|
// sending response headers
|
|
if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader);
|
|
HTTPDemon.sendRespondHeader(conProp,
|
|
countedRespond,
|
|
httpVer,
|
|
statusCode,
|
|
client.getHttpResponse().getStatusLine().toString(), // status text
|
|
responseHeader);
|
|
|
|
final OutputStream outStream = (chunked != null) ? chunked : countedRespond;
|
|
client.writeTo(outStream);
|
|
|
|
if (chunked != null) {
|
|
chunked.finish();
|
|
}
|
|
outStream.flush();
|
|
} catch(final SocketException se) {
|
|
// connection closed by client, abort download
|
|
client.finish();
|
|
} finally {
|
|
client.finish();
|
|
}
|
|
} catch (final Exception e) {
|
|
handleProxyException(e,conProp,countedRespond,url);
|
|
} finally {
|
|
if(countedRespond != null) {
|
|
countedRespond.flush();
|
|
countedRespond.finish();
|
|
}
|
|
if(respond != null) {
|
|
respond.flush();
|
|
}
|
|
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_END, Long.valueOf(System.currentTimeMillis()));
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE,(countedRespond != null) ? Long.toString(countedRespond.getCount()) : "-1");
|
|
logProxyAccess(conProp);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* resolve yacy and yacyh domains
|
|
*
|
|
* @param host
|
|
* @return
|
|
*/
|
|
private static String resolveYacyDomains(final String host) {
|
|
return (HTTPDemon.getAlternativeResolver() == null) ? null : HTTPDemon.getAlternativeResolver().resolve(host);
|
|
}
|
|
|
|
/**
|
|
* @param host
|
|
* @param port
|
|
* @param yAddress
|
|
* @return
|
|
*/
|
|
private static String hostPart(final String host, final int port, final String yAddress) {
|
|
final String connectHost = (yAddress == null) ? host +":"+ port : yAddress;
|
|
return connectHost;
|
|
}
|
|
|
|
/**
|
|
* @param conProp
|
|
* @param requestHeader
|
|
* @param hostlow
|
|
*/
|
|
private static void prepareRequestHeader(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final String hostlow) {
|
|
// set another userAgent, if not yellow-listed
|
|
if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) {
|
|
// change the User-Agent
|
|
requestHeader.put(HeaderFramework.USER_AGENT, generateUserAgent(requestHeader));
|
|
}
|
|
|
|
// only gzip-encoding is supported, remove other encodings (e. g. deflate)
|
|
if ((requestHeader.get(HeaderFramework.ACCEPT_ENCODING,"")).indexOf("gzip",0) != -1) {
|
|
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, "gzip");
|
|
} else {
|
|
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, "");
|
|
}
|
|
|
|
addXForwardedForHeader(conProp, requestHeader);
|
|
}
|
|
|
|
private static String domain(final String host) {
|
|
String domain = host;
|
|
int pos = domain.lastIndexOf('.');
|
|
if (pos >= 0) {
|
|
// truncate from last part
|
|
domain = domain.substring(0, pos);
|
|
pos = domain.lastIndexOf('.');
|
|
if (pos >= 0) {
|
|
// truncate from first part
|
|
domain = domain.substring(pos + 1);
|
|
}
|
|
}
|
|
return domain;
|
|
}
|
|
|
|
/**
|
|
* creates a new HttpClient and sets parameters according to proxy needs
|
|
*
|
|
* @param requestHeader
|
|
* @param connectHost may be 'host:port' or 'host:port/path'
|
|
* @return
|
|
*/
|
|
private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) {
|
|
// setup HTTP-client
|
|
final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
|
|
client.setHeader(requestHeader.entrySet());
|
|
client.setRedirecting(false);
|
|
return client;
|
|
}
|
|
|
|
/**
|
|
* determines in which form the response should be send and sets header accordingly
|
|
* if the content length is not set we need to use chunked content encoding
|
|
* Implemented:
|
|
* if !content-length
|
|
* switch httpVer
|
|
* case 0.9:
|
|
* case 1.0:
|
|
* close connection after transfer
|
|
* break;
|
|
* default:
|
|
* new ChunkedStream around respond
|
|
* end if
|
|
*
|
|
* @param conProp
|
|
* @param responseHeader
|
|
* @param statusCode
|
|
* @param respond
|
|
* @return
|
|
*/
|
|
private static ChunkedOutputStream setTransferEncoding(
|
|
final HashMap<String, Object> conProp, final ResponseHeader responseHeader,
|
|
final int statusCode, final OutputStream respond) {
|
|
final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
ChunkedOutputStream chunkedOut = null;
|
|
// gzipped response is ungzipped an therefor the length is unknown
|
|
if (responseHeader.gzip() || responseHeader.getContentLength() < 0) {
|
|
// according to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
|
|
// a 204,304 message must not contain a message body.
|
|
// Therefore we need to set the content-length to 0.
|
|
if (statusCode == 204 || statusCode == 304) {
|
|
responseHeader.put(HeaderFramework.CONTENT_LENGTH, "0");
|
|
} else {
|
|
if (httpVer.equals(HeaderFramework.HTTP_VERSION_0_9) || httpVer.equals(HeaderFramework.HTTP_VERSION_1_0)) {
|
|
forceConnectionClose(conProp);
|
|
} else {
|
|
chunkedOut = new ChunkedOutputStream(respond);
|
|
}
|
|
responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
|
|
}
|
|
}
|
|
return chunkedOut;
|
|
}
|
|
|
|
/**
|
|
* @param res
|
|
* @param responseHeader
|
|
*/
|
|
private static void prepareResponseHeader(final ResponseHeader responseHeader, final String httpVer) {
|
|
modifyProxyHeaders(responseHeader, httpVer);
|
|
correctContentEncoding(responseHeader);
|
|
}
|
|
|
|
/**
|
|
* @param responseHeader
|
|
*/
|
|
private static void correctContentEncoding(final ResponseHeader responseHeader) {
|
|
// TODO gzip again? set "correct" encoding?
|
|
if(responseHeader.gzip()) {
|
|
responseHeader.remove(HeaderFramework.CONTENT_ENCODING);
|
|
responseHeader.remove(HeaderFramework.CONTENT_LENGTH); // remove gziped length
|
|
}
|
|
}
|
|
|
|
/**
|
|
* adds the client-IP of conProp to the requestHeader
|
|
*
|
|
* @param conProp
|
|
* @param requestHeader
|
|
*/
|
|
private static void addXForwardedForHeader(final HashMap<String, Object> conProp, final RequestHeader requestHeader) {
|
|
// setting the X-Forwarded-For Header
|
|
if (sb.getConfigBool("proxy.sendXForwardedForHeader", true)) {
|
|
requestHeader.put(HeaderFramework.X_FORWARDED_FOR, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* removing hop by hop headers and adding additional headers
|
|
*
|
|
* @param requestHeader
|
|
* @param httpVer
|
|
*/
|
|
private static void modifyProxyHeaders(final HeaderFramework requestHeader, final String httpVer) {
|
|
removeHopByHopHeaders(requestHeader);
|
|
setViaHeader(requestHeader, httpVer);
|
|
}
|
|
|
|
private static void removeHopByHopHeaders(final HeaderFramework headers) {
|
|
/*
|
|
- Trailers
|
|
*/
|
|
|
|
headers.remove(RequestHeader.CONNECTION);
|
|
headers.remove(RequestHeader.KEEP_ALIVE);
|
|
headers.remove(RequestHeader.UPGRADE);
|
|
headers.remove(RequestHeader.TE);
|
|
headers.remove(RequestHeader.PROXY_CONNECTION);
|
|
headers.remove(RequestHeader.PROXY_AUTHENTICATE);
|
|
headers.remove(RequestHeader.PROXY_AUTHORIZATION);
|
|
|
|
// special headers inserted by squid
|
|
headers.remove(RequestHeader.X_CACHE);
|
|
headers.remove(RequestHeader.X_CACHE_LOOKUP);
|
|
|
|
// remove transfer encoding header
|
|
headers.remove(HeaderFramework.TRANSFER_ENCODING);
|
|
|
|
//removing yacy status headers
|
|
headers.remove(HeaderFramework.X_YACY_KEEP_ALIVE_REQUEST_COUNT);
|
|
headers.remove(HeaderFramework.X_YACY_ORIGINAL_REQUEST_LINE);
|
|
}
|
|
|
|
private static void setViaHeader(final HeaderFramework header, final String httpVer) {
|
|
if (!sb.getConfigBool("proxy.sendViaHeader", true)) return;
|
|
final String myAddress = (HTTPDemon.getAlternativeResolver() == null) ? null : HTTPDemon.getAlternativeResolver().myAlternativeAddress();
|
|
if (myAddress != null) {
|
|
|
|
// getting header set by other proxies in the chain
|
|
final StringBuilder viaValue = new StringBuilder(80);
|
|
if (header.containsKey(HeaderFramework.VIA)) viaValue.append(header.get(HeaderFramework.VIA));
|
|
if (viaValue.length() > 0) viaValue.append(", ");
|
|
|
|
// appending info about this peer
|
|
viaValue
|
|
.append(httpVer).append(" ")
|
|
.append(myAddress).append(" ")
|
|
.append("(YaCy ").append(sb.getConfig("vString", "0.0")).append(")");
|
|
|
|
// storing header back
|
|
header.put(HeaderFramework.VIA, viaValue.toString());
|
|
}
|
|
}
|
|
|
|
public static void doConnect(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final InputStream clientIn, final OutputStream clientOut) throws IOException {
|
|
|
|
sb.proxyLastAccess = System.currentTimeMillis();
|
|
|
|
String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH);
|
|
final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS);
|
|
if (args != null) { path = path + "?" + args; }
|
|
|
|
int port, pos;
|
|
if ((pos = host.indexOf(':')) < 0) {
|
|
port = 80;
|
|
} else {
|
|
port = Integer.parseInt(host.substring(pos + 1));
|
|
host = host.substring(0, pos);
|
|
}
|
|
|
|
// check the blacklist
|
|
// blacklist idea inspired by [AS]:
|
|
// respond a 404 for all AGIS ("all you get is shit") servers
|
|
final String hostlow = host.toLowerCase();
|
|
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
|
|
HTTPDemon.sendRespondError(conProp,clientOut,4,403,null,
|
|
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
|
|
log.logInfo("AGIS blocking of host '" + hostlow + "'");
|
|
forceConnectionClose(conProp);
|
|
return;
|
|
}
|
|
|
|
// possibly branch into PROXY-PROXY connection
|
|
if (ProxySettings.useForHost(host, Protocol.HTTPS)) {
|
|
final HTTPClient remoteProxy = setupHttpClient(requestHeader, host);
|
|
|
|
try {
|
|
remoteProxy.HEADResponse("http://" + host + ":" + port);
|
|
int statusCode = remoteProxy.getHttpResponse().getStatusLine().getStatusCode();
|
|
final ResponseHeader header = new ResponseHeader(statusCode, remoteProxy.getHttpResponse().getAllHeaders());
|
|
|
|
// outputs a logline to the serverlog with the current status
|
|
log.logInfo("CONNECT-RESPONSE: status=" + remoteProxy.getHttpResponse().getStatusLine() + ", header=" + header.toString());
|
|
final boolean success = statusCode >= 200 && statusCode <= 399;
|
|
if (success) {
|
|
// replace connection details
|
|
host = ProxySettings.host;
|
|
port = ProxySettings.port;
|
|
// go on (see below)
|
|
} else {
|
|
// pass error response back to client
|
|
HTTPDemon.sendRespondHeader(
|
|
conProp,
|
|
clientOut,
|
|
httpVersion,
|
|
remoteProxy.getHttpResponse().getStatusLine().getStatusCode(),
|
|
remoteProxy.getHttpResponse().getStatusLine().toString(),
|
|
header);
|
|
//respondHeader(clientOut, response.status, response.responseHeader);
|
|
forceConnectionClose(conProp);
|
|
return;
|
|
}
|
|
} catch (final Exception e) {
|
|
throw new IOException(e.getMessage());
|
|
}
|
|
}
|
|
|
|
// try to establish connection to remote host
|
|
final Socket sslSocket = new Socket(host, port);
|
|
sslSocket.setSoTimeout(timeout); // waiting time for write
|
|
sslSocket.setSoLinger(true, timeout); // waiting time for read
|
|
final InputStream promiscuousIn = sslSocket.getInputStream();
|
|
final OutputStream promiscuousOut = sslSocket.getOutputStream();
|
|
|
|
// now then we can return a success message
|
|
clientOut.write(UTF8.getBytes(httpVersion + " 200 Connection established" + serverCore.CRLF_STRING +
|
|
"Proxy-agent: YACY" + serverCore.CRLF_STRING +
|
|
serverCore.CRLF_STRING));
|
|
|
|
log.logInfo("SSL connection to " + host + ":" + port + " established.");
|
|
|
|
// start stream passing with mediate processes
|
|
final Mediate cs = new Mediate(sslSocket, clientIn, promiscuousOut);
|
|
final Mediate sc = new Mediate(sslSocket, promiscuousIn, clientOut);
|
|
cs.start();
|
|
sc.start();
|
|
while ((sslSocket != null) &&
|
|
(sslSocket.isBound()) &&
|
|
(!(sslSocket.isClosed())) &&
|
|
(sslSocket.isConnected()) &&
|
|
((cs.isAlive()) || (sc.isAlive()))) {
|
|
// idle
|
|
try {Thread.sleep(1000);} catch (final InterruptedException e) {} // wait a while
|
|
}
|
|
// set stop mode
|
|
cs.pleaseTerminate();
|
|
sc.pleaseTerminate();
|
|
// wake up thread
|
|
cs.interrupt();
|
|
sc.interrupt();
|
|
// ...hope they have terminated...
|
|
}
|
|
|
|
public static class Mediate extends Thread {
|
|
|
|
boolean terminate;
|
|
Socket socket;
|
|
InputStream in;
|
|
OutputStream out;
|
|
|
|
public Mediate(final Socket socket, final InputStream in, final OutputStream out) {
|
|
this.terminate = false;
|
|
this.in = in;
|
|
this.out = out;
|
|
this.socket = socket;
|
|
}
|
|
|
|
@Override
|
|
public void run() {
|
|
final byte[] buffer = new byte[512];
|
|
int len;
|
|
try {
|
|
while ((this.socket != null) &&
|
|
(this.socket.isBound()) &&
|
|
(!(this.socket.isClosed())) &&
|
|
(this.socket.isConnected()) &&
|
|
(!(this.terminate)) &&
|
|
(this.in != null) &&
|
|
(this.out != null) &&
|
|
((len = this.in.read(buffer)) >= 0)
|
|
) {
|
|
this.out.write(buffer, 0, len);
|
|
}
|
|
} catch (final IOException e) {
|
|
// do nothing
|
|
} catch (final Exception e) {
|
|
Log.logException(e);
|
|
}
|
|
}
|
|
|
|
public void pleaseTerminate() {
|
|
this.terminate = true;
|
|
}
|
|
}
|
|
|
|
private static void handleProxyException(final Exception e, final HashMap<String, Object> conProp, final OutputStream respond, final DigestURI url) {
|
|
// this may happen if
|
|
// - the targeted host does not exist
|
|
// - anything with the remote server was wrong.
|
|
// - the client unexpectedly closed the connection ...
|
|
try {
|
|
|
|
|
|
// doing some errorhandling ...
|
|
int httpStatusCode = 404;
|
|
String httpStatusText = null;
|
|
String errorMessage = null;
|
|
Exception errorExc = null;
|
|
boolean unknownError = false;
|
|
|
|
// for customized error messages
|
|
boolean detailedErrorMsg = false;
|
|
String detailedErrorMsgFile = null;
|
|
serverObjects detailedErrorMsgMap = null;
|
|
|
|
if (e instanceof ConnectException) {
|
|
httpStatusCode = 403; httpStatusText = "Connection refused";
|
|
errorMessage = "Connection refused by destination host";
|
|
} else if (e instanceof BindException) {
|
|
errorMessage = "Unable to establish a connection to the destination host";
|
|
} else if (e instanceof NoRouteToHostException) {
|
|
errorMessage = "No route to destination host";
|
|
} else if (e instanceof UnknownHostException) {
|
|
//errorMessage = "IP address of the destination host could not be determined";
|
|
try {
|
|
detailedErrorMsgMap = unknownHostHandling(conProp);
|
|
httpStatusText = "Unknown Host";
|
|
detailedErrorMsg = true;
|
|
detailedErrorMsgFile = "proxymsg/unknownHost.inc";
|
|
} catch (final Exception e1) {
|
|
errorMessage = "IP address of the destination host could not be determined";
|
|
}
|
|
} else if (e instanceof SocketTimeoutException) {
|
|
errorMessage = "Unable to establish a connection to the destination host. Connect timed out.";
|
|
} else {
|
|
final String exceptionMsg = e.getMessage();
|
|
if ((exceptionMsg != null) && (exceptionMsg.indexOf("Corrupt GZIP trailer",0) >= 0)) {
|
|
// just do nothing, we leave it this way
|
|
if (log.isFine()) log.logFine("ignoring bad gzip trail for URL " + url + " (" + e.getMessage() + ")");
|
|
forceConnectionClose(conProp);
|
|
} else if ((exceptionMsg != null) && (exceptionMsg.indexOf("Connection reset",0)>= 0)) {
|
|
errorMessage = "Connection reset";
|
|
} else if ((exceptionMsg != null) && (exceptionMsg.indexOf("unknown host",0)>=0)) {
|
|
try {
|
|
detailedErrorMsgMap = unknownHostHandling(conProp);
|
|
httpStatusText = "Unknown Host";
|
|
detailedErrorMsg = true;
|
|
detailedErrorMsgFile = "proxymsg/unknownHost.inc";
|
|
} catch (final Exception e1) {
|
|
errorMessage = "IP address of the destination host could not be determined";
|
|
}
|
|
} else if ((exceptionMsg != null) &&
|
|
(
|
|
(exceptionMsg.indexOf("socket write error",0)>=0) ||
|
|
(exceptionMsg.indexOf("Read timed out",0) >= 0) ||
|
|
(exceptionMsg.indexOf("Broken pipe",0) >= 0) ||
|
|
(exceptionMsg.indexOf("server has closed connection",0) >= 0)
|
|
)) {
|
|
errorMessage = exceptionMsg;
|
|
Log.logException(e);
|
|
} else {
|
|
errorMessage = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage();
|
|
unknownError = true;
|
|
errorExc = e;
|
|
}
|
|
}
|
|
|
|
// sending back an error message to the client
|
|
if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
|
|
if (detailedErrorMsg) {
|
|
HTTPDemon.sendRespondError(conProp,respond, httpStatusCode, httpStatusText, new File(detailedErrorMsgFile), detailedErrorMsgMap, errorExc);
|
|
} else {
|
|
HTTPDemon.sendRespondError(conProp,respond,4,httpStatusCode,httpStatusText,errorMessage,errorExc);
|
|
}
|
|
} else {
|
|
if (unknownError) {
|
|
log.logSevere("Unknown Error while processing request '" +
|
|
conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE) + "':" +
|
|
"\n" + Thread.currentThread().getName() +
|
|
"\n" + errorMessage,e);
|
|
} else {
|
|
log.logWarning("Error while processing request '" +
|
|
conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE) + "':" +
|
|
"\n" + Thread.currentThread().getName() +
|
|
"\n" + errorMessage);
|
|
}
|
|
forceConnectionClose(conProp);
|
|
}
|
|
} catch (final Exception ee) {
|
|
forceConnectionClose(conProp);
|
|
}
|
|
|
|
}
|
|
|
|
private static void forceConnectionClose(final HashMap<String, Object> conProp) {
|
|
if (conProp != null) {
|
|
conProp.put(HeaderFramework.CONNECTION_PROP_PERSISTENT,"close");
|
|
}
|
|
}
|
|
|
|
private static serverObjects unknownHostHandling(final HashMap<String, Object> conProp) throws Exception {
|
|
final serverObjects detailedErrorMsgMap = new serverObjects();
|
|
|
|
// generic toplevel domains
|
|
final HashSet<String> topLevelDomains = new HashSet<String>(Arrays.asList(new String[]{
|
|
"aero", // Fluggesellschaften/Luftfahrt
|
|
"arpa", // Einrichtung des ARPANet
|
|
"biz", // Business
|
|
"com", // Commercial
|
|
"coop", // genossenschaftliche Unternehmen
|
|
"edu", // Education
|
|
"gov", // Government
|
|
"info", // Informationsangebote
|
|
"int", // International
|
|
"jobs", // Jobangebote von Unternemen
|
|
"mil", // Military (US-Militaer)
|
|
// "museum", // Museen
|
|
"name", // Privatpersonen
|
|
"nato", // NATO (veraltet)
|
|
"net", // Net (Netzwerkbetreiber)
|
|
"org", // Organization (Nichtkommerzielle Organisation)
|
|
"pro", // Professionals
|
|
"travel", // Touristikindustrie
|
|
|
|
// some country tlds
|
|
"de",
|
|
"at",
|
|
"ch",
|
|
"it",
|
|
"uk"
|
|
}));
|
|
|
|
// getting some connection properties
|
|
String orgHostPort = "80";
|
|
String orgHostName = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
if (orgHostName == null) orgHostName = "unknown";
|
|
orgHostName = orgHostName.toLowerCase();
|
|
int pos = orgHostName.indexOf(':');
|
|
if (pos != -1) {
|
|
orgHostPort = orgHostName.substring(pos+1);
|
|
orgHostName = orgHostName.substring(0,pos);
|
|
}
|
|
String orgHostPath = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); if (orgHostPath == null) orgHostPath = "";
|
|
String orgHostArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); if (orgHostArgs == null) orgHostArgs = "";
|
|
if (orgHostArgs.length() > 0) orgHostArgs = "?" + orgHostArgs;
|
|
detailedErrorMsgMap.put("hostName", orgHostName);
|
|
|
|
// guessing hostnames
|
|
final HashSet<String> testHostNames = new HashSet<String>();
|
|
String testHostName = null;
|
|
if (!orgHostName.startsWith("www.")) {
|
|
testHostName = "www." + orgHostName;
|
|
final InetAddress addr = Domains.dnsResolve(testHostName);
|
|
if (addr != null) testHostNames.add(testHostName);
|
|
} else if (orgHostName.startsWith("www.")) {
|
|
testHostName = orgHostName.substring(4);
|
|
final InetAddress addr = Domains.dnsResolve(testHostName);
|
|
if (addr != null) if (addr != null) testHostNames.add(testHostName);
|
|
}
|
|
if (orgHostName.length()>4 && orgHostName.startsWith("www") && (orgHostName.charAt(3) != '.')) {
|
|
testHostName = orgHostName.substring(0,3) + "." + orgHostName.substring(3);
|
|
final InetAddress addr = Domains.dnsResolve(testHostName);
|
|
if (addr != null) if (addr != null) testHostNames.add(testHostName);
|
|
}
|
|
|
|
pos = orgHostName.lastIndexOf('.');
|
|
if (pos != -1) {
|
|
final Iterator<String> iter = topLevelDomains.iterator();
|
|
while (iter.hasNext()) {
|
|
final String topLevelDomain = iter.next();
|
|
testHostName = orgHostName.substring(0,pos) + "." + topLevelDomain;
|
|
final InetAddress addr = Domains.dnsResolve(testHostName);
|
|
if (addr != null) if (addr != null) testHostNames.add(testHostName);
|
|
}
|
|
}
|
|
|
|
int hostNameCount = 0;
|
|
final Iterator<String> iter = testHostNames.iterator();
|
|
while (iter.hasNext()) {
|
|
testHostName = iter.next();
|
|
detailedErrorMsgMap.put("list_" + hostNameCount + "_hostName",testHostName);
|
|
detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPort",orgHostPort);
|
|
detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPath",orgHostPath);
|
|
detailedErrorMsgMap.put("list_" + hostNameCount + "_hostArgs",orgHostArgs);
|
|
hostNameCount++;
|
|
}
|
|
|
|
detailedErrorMsgMap.put("list", hostNameCount);
|
|
|
|
if (hostNameCount != 0) {
|
|
detailedErrorMsgMap.put("showList", 1);
|
|
} else {
|
|
detailedErrorMsgMap.put("showList", 0);
|
|
}
|
|
|
|
return detailedErrorMsgMap;
|
|
}
|
|
|
|
private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) {
|
|
userAgentStr.setLength(0);
|
|
|
|
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyProxyUserAgent);
|
|
final int pos = browserUserAgent.lastIndexOf(')');
|
|
if (pos >= 0) {
|
|
userAgentStr
|
|
.append(browserUserAgent.substring(0,pos))
|
|
.append("; YaCy ")
|
|
.append(sb.getConfig("vString","0.1"))
|
|
.append("; yacy.net")
|
|
.append(browserUserAgent.substring(pos));
|
|
} else {
|
|
userAgentStr.append(browserUserAgent);
|
|
}
|
|
|
|
return userAgentStr.toString();
|
|
}
|
|
|
|
/**
|
|
* This function is used to generate a logging message according to the
|
|
* <a href="http://www.squid-cache.org/Doc/FAQ/FAQ-6.html">squid logging format</a>.<p>
|
|
* e.g.<br>
|
|
* <code>1117528623.857 178 192.168.1.201 TCP_MISS/200 1069 GET http://www.yacy.de/ - DIRECT/81.169.145.74 text/html</code>
|
|
*/
|
|
private final static synchronized void logProxyAccess(final HashMap<String, Object> conProp) {
|
|
|
|
if (!doAccessLogging) return;
|
|
|
|
logMessage.setLength(0);
|
|
|
|
// Timestamp
|
|
final String currentTimestamp = Long.toString(System.currentTimeMillis());
|
|
final int offset = currentTimestamp.length()-3;
|
|
|
|
logMessage.append(currentTimestamp.substring(0,offset));
|
|
logMessage.append('.');
|
|
logMessage.append(currentTimestamp.substring(offset));
|
|
logMessage.append(' ');
|
|
|
|
// Elapsed time
|
|
final Long requestStart = (Long) conProp.get(HeaderFramework.CONNECTION_PROP_REQUEST_START);
|
|
final Long requestEnd = (Long) conProp.get(HeaderFramework.CONNECTION_PROP_REQUEST_END);
|
|
final String elapsed = Long.toString(requestEnd.longValue()-requestStart.longValue());
|
|
|
|
for (int i=0; i<6-elapsed.length(); i++) logMessage.append(' ');
|
|
logMessage.append(elapsed);
|
|
logMessage.append(' ');
|
|
|
|
// Remote Host
|
|
final String clientIP = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP);
|
|
logMessage.append(clientIP);
|
|
logMessage.append(' ');
|
|
|
|
// Code/Status
|
|
final String respondStatus = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS);
|
|
String respondCode = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE);
|
|
if (respondCode == null) respondCode = "UNKNOWN";
|
|
logMessage.append(respondCode);
|
|
logMessage.append("/");
|
|
logMessage.append(respondStatus);
|
|
logMessage.append(' ');
|
|
|
|
// Bytes
|
|
final String bytes = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE);
|
|
logMessage.append(bytes.toString());
|
|
logMessage.append(' ');
|
|
|
|
// Method
|
|
final String requestMethod = (String) conProp.get(HeaderFramework.CONNECTION_PROP_METHOD);
|
|
logMessage.append(requestMethod);
|
|
logMessage.append(' ');
|
|
|
|
// URL
|
|
final String requestURL = (String) conProp.get(HeaderFramework.CONNECTION_PROP_URL);
|
|
final String requestArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS);
|
|
logMessage.append(requestURL);
|
|
if (requestArgs != null) {
|
|
logMessage.append("?")
|
|
.append(requestArgs);
|
|
}
|
|
logMessage.append(' ');
|
|
|
|
// Rfc931
|
|
logMessage.append("-");
|
|
logMessage.append(' ');
|
|
|
|
// Peerstatus/Peerhost
|
|
final String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST);
|
|
logMessage.append("DIRECT/");
|
|
logMessage.append(host);
|
|
logMessage.append(' ');
|
|
|
|
// Type
|
|
String mime = "-";
|
|
if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
|
|
final HeaderFramework proxyRespondHeader = (HeaderFramework) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER);
|
|
mime = proxyRespondHeader.mime();
|
|
if (mime.indexOf(';') != -1) {
|
|
mime = mime.substring(0,mime.indexOf(';'));
|
|
}
|
|
}
|
|
logMessage.append(mime);
|
|
|
|
// sending the logging message to the logger
|
|
if (proxyLog.isFine()) proxyLog.logFine(logMessage.toString());
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
proxy test:
|
|
|
|
http://www.chipchapin.com/WebTools/cookietest.php?
|
|
http://xlists.aza.org/moderator/cookietest/cookietest1.php
|
|
http://vancouver-webpages.com/proxy/cache-test.html
|
|
|
|
*/
|