some generalization of remote proxy configuration and setting handling in httpc

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4023 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2007-08-02 00:42:37 +00:00
parent fac8de6e9b
commit 57a5b6fa71
13 changed files with 51 additions and 123 deletions

View File

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.54
releaseVersion=0.541
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy

View File

@ -512,7 +512,8 @@ public class CrawlURLFetch_p {
url.getHost(),
url.getPort(),
15000,
url.getProtocol().equals("https"));
url.getProtocol().equals("https"),
plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
httpHeader header = new httpHeader();
header.put(httpHeader.ACCEPT_ENCODING, "US-ASCII");

View File

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
<ShortName>YaCy/#[clientname]#</ShortName>
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
@ -15,6 +15,6 @@
<Query role="example" searchTerms="yacy" />
<Tags>YaCy P2P Web Search</Tags>
<Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
<Attribution>YaCy Software &amp;copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
<Attribution>YaCy Software &amp;copy; 2004-2007 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
<SyndicationRight>open</SyndicationRight>
</OpenSearchDescription>

View File

@ -180,7 +180,8 @@ public class SitemapParser extends DefaultHandler {
this.siteMapURL.getHost(),
this.siteMapURL.getPort(),
5000,
this.siteMapURL.getProtocol().equalsIgnoreCase("https"));
this.siteMapURL.getProtocol().equalsIgnoreCase("https"),
switchboard.remoteProxyConfig);
httpc.response res = remote.GET(this.siteMapURL.getFile(), null);
if (res.statusCode != 200) {

View File

@ -386,15 +386,7 @@ public final class robotsParser{
downloadStart = System.currentTimeMillis();
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
//TODO: adding Traffic statistic for robots download?
if (
(sb == null) ||
(sb.remoteProxyConfig == null) ||
(!sb.remoteProxyConfig.useProxy())
) {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"));
} else {
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
}
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
httpHeader reqHeaders = new httpHeader();

View File

@ -308,62 +308,7 @@ public final class httpc {
boolean ssl,
httpRemoteProxyConfig remoteProxyConfig
) throws IOException {
if (remoteProxyConfig == null) throw new NullPointerException("Proxy object must not be null.");
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
}
public static httpc getInstance(
String server,
String vhost,
int port,
int timeout,
boolean ssl
) throws IOException {
return getInstance(server,vhost,port,timeout,ssl,null,null);
}
/**
* This method gets a new httpc instance from the object pool and
* initializes it with the given parameters.
*
* @param server
* @param port
* @param timeout
* @param ssl
* @throws IOException
* @see httpc#init
*/
public static httpc getInstance(
String server,
String vhost,
int port,
int timeout,
boolean ssl,
String incomingByteCountAccounting,
String outgoingByteCountAccounting
) throws IOException {
httpc newHttpc = null;
// fetching a new httpc from the object pool
try {
newHttpc = (httpc) httpc.theHttpcPool.borrowObject();
} catch (Exception e) {
throw new IOException("Unable to fetch a new httpc from pool. " + e.getMessage());
}
// initialize it
try {
newHttpc.init(server,vhost,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
} catch (IOException e) {
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
throw e;
}
return newHttpc;
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
}
/**
@ -439,7 +384,7 @@ public final class httpc {
* @param remoteProxyPort
* @throws IOException
*/
void init(
private void init(
String server,
String vhost,
int port,
@ -450,6 +395,20 @@ public final class httpc {
String outgoingByteCountAccounting
) throws IOException {
if ((theRemoteProxyConfig == null) ||
(!theRemoteProxyConfig.useProxy())) {
initN(
server,
vhost,
port,
timeout,
ssl,
incomingByteCountAccounting,
outgoingByteCountAccounting
);
return;
}
if (port == -1) {
port = (ssl)? 443 : 80;
}
@ -457,7 +416,14 @@ public final class httpc {
String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
this.init(remoteProxyHost, vhost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
this.initN(
remoteProxyHost,
vhost,
remoteProxyPort,
timeout,
ssl,
incomingByteCountAccounting,
outgoingByteCountAccounting);
this.remoteProxyUse = true;
this.adressed_host = server;
@ -476,7 +442,7 @@ public final class httpc {
* @param ssl Wether we should use SSL.
* @throws IOException
*/
void init(
private void initN(
String server,
String vhost,
int port,
@ -968,11 +934,7 @@ public final class httpc {
httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
} else {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
}
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.GET(path, requestHeader);
if (res.status.startsWith("2")) {
@ -1036,11 +998,7 @@ public final class httpc {
httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
} else {
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
}
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.POST(path, requestHeader, props, files);
//System.out.println("response=" + res.toString());
@ -1198,10 +1156,7 @@ public final class httpc {
// start connection
httpc con = null;
try {
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
con = httpc.getInstance(realhost, vhost, port, timeout, ssl);
else con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
httpc.response res = con.HEAD(path, requestHeader);
if (res.status.startsWith("2")) {
// success

View File

@ -1265,15 +1265,13 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
// a new httpc connection, combined with possible remote proxy
boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());
// check no-proxy rule
if (
(switchboard.remoteProxyConfig != null) &&
(switchboard.remoteProxyConfig.useProxy()) &&
(!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
useProxy = false;
(remProxyConfig != null) &&
(remProxyConfig.useProxy()) &&
(!(remProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
if (remProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
remProxyConfig = null;
} else {
// analyse remoteProxyNoProxy;
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
@ -1282,7 +1280,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
// disallow proxy for this server
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
useProxy = false;
remProxyConfig = null;
break;
}
i++;
@ -1295,8 +1293,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
}
// branch to server/proxy
if (useProxy) {
return httpc.getInstance(
return httpc.getInstance(
server,
server,
port,
@ -1304,14 +1301,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
false,
remProxyConfig
);
}
return httpc.getInstance(
server,
server,
port,
timeout,
false
);
}
private httpc newhttpc(String address, int timeout) throws IOException {

View File

@ -117,7 +117,7 @@ public class kelondroMSetTools {
k = (Long) orderMap.firstKey(); // the next smallest...
mapA = joinResult;
mapB = (TreeMap) orderMap.remove(k);
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings);
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings); // TODO: better with enumeration?
// free resources
mapA = null;
mapB = null;

View File

@ -196,9 +196,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
requestHeader.put(httpHeader.ACCEPT_ENCODING, this.acceptEncoding);
// open the connection
remote = ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy()))
? httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null)
: httpc.getInstance(host, host, port, this.socketTimeout, ssl, "CRAWLER",null);
remote = httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null);
// specifying if content encoding is allowed
remote.setAllowContentEncoding((this.acceptEncoding != null && this.acceptEncoding.length() > 0));

View File

@ -424,7 +424,7 @@ public final class plasmaCrawlLURL {
URL newUrl = new URL(newUrlStr);
// doing a http head request to test if the url is correct
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false);
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
response res = theHttpc.HEAD(newUrl.getPath(), null);
if (res.statusCode == 200) {

View File

@ -919,7 +919,8 @@ public final class plasmaParser {
contentURL.getHost(),
contentURL.getPort(),
5000,
contentURL.getProtocol().equalsIgnoreCase("https"));
contentURL.getProtocol().equalsIgnoreCase("https"),
null);
httpc.response res = remote.GET(contentURL.getFile(), null);
if (res.statusCode != 200) {

View File

@ -763,22 +763,13 @@ public final class yacySeedDB {
httpc remote = null;
try {
// init httpc
if ((sb.remoteProxyConfig == null)||(!sb.remoteProxyConfig.useProxy())) {
remote = httpc.getInstance(
seedURL.getHost(),
seedURL.getHost(),
seedURL.getPort(),
10000,
seedURL.getProtocol().equalsIgnoreCase("https"));
} else {
remote = httpc.getInstance(
remote = httpc.getInstance(
seedURL.getHost(),
seedURL.getHost(),
seedURL.getPort(),
10000,
seedURL.getProtocol().equalsIgnoreCase("https"),
sb.remoteProxyConfig);
}
sb.remoteProxyConfig);
// Configure http headers
httpHeader reqHeader = new httpHeader();

View File

@ -514,7 +514,7 @@ public final class yacy {
httpHeader requestHeader = new httpHeader();
requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
try {
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false);
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false, null);
httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);
// read response