mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
some generalization of remote proxy configuration and setting handling in httpc
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4023 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
fac8de6e9b
commit
57a5b6fa71
|
@ -3,7 +3,7 @@ javacSource=1.4
|
|||
javacTarget=1.4
|
||||
|
||||
# Release Configuration
|
||||
releaseVersion=0.54
|
||||
releaseVersion=0.541
|
||||
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||
releaseFileParentDir=yacy
|
||||
|
|
|
@ -512,7 +512,8 @@ public class CrawlURLFetch_p {
|
|||
url.getHost(),
|
||||
url.getPort(),
|
||||
15000,
|
||||
url.getProtocol().equals("https"));
|
||||
url.getProtocol().equals("https"),
|
||||
plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
|
||||
|
||||
httpHeader header = new httpHeader();
|
||||
header.put(httpHeader.ACCEPT_ENCODING, "US-ASCII");
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
||||
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
|
||||
<ShortName>YaCy/#[clientname]#</ShortName>
|
||||
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
|
||||
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
|
||||
|
@ -15,6 +15,6 @@
|
|||
<Query role="example" searchTerms="yacy" />
|
||||
<Tags>YaCy P2P Web Search</Tags>
|
||||
<Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
|
||||
<Attribution>YaCy Software &copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
|
||||
<Attribution>YaCy Software &copy; 2004-2007 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
|
||||
<SyndicationRight>open</SyndicationRight>
|
||||
</OpenSearchDescription>
|
|
@ -180,7 +180,8 @@ public class SitemapParser extends DefaultHandler {
|
|||
this.siteMapURL.getHost(),
|
||||
this.siteMapURL.getPort(),
|
||||
5000,
|
||||
this.siteMapURL.getProtocol().equalsIgnoreCase("https"));
|
||||
this.siteMapURL.getProtocol().equalsIgnoreCase("https"),
|
||||
switchboard.remoteProxyConfig);
|
||||
|
||||
httpc.response res = remote.GET(this.siteMapURL.getFile(), null);
|
||||
if (res.statusCode != 200) {
|
||||
|
|
|
@ -386,15 +386,7 @@ public final class robotsParser{
|
|||
downloadStart = System.currentTimeMillis();
|
||||
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
|
||||
//TODO: adding Traffic statistic for robots download?
|
||||
if (
|
||||
(sb == null) ||
|
||||
(sb.remoteProxyConfig == null) ||
|
||||
(!sb.remoteProxyConfig.useProxy())
|
||||
) {
|
||||
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"));
|
||||
} else {
|
||||
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
|
||||
}
|
||||
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
|
||||
|
||||
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
|
||||
httpHeader reqHeaders = new httpHeader();
|
||||
|
|
|
@ -308,62 +308,7 @@ public final class httpc {
|
|||
boolean ssl,
|
||||
httpRemoteProxyConfig remoteProxyConfig
|
||||
) throws IOException {
|
||||
if (remoteProxyConfig == null) throw new NullPointerException("Proxy object must not be null.");
|
||||
|
||||
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
|
||||
}
|
||||
|
||||
public static httpc getInstance(
|
||||
String server,
|
||||
String vhost,
|
||||
int port,
|
||||
int timeout,
|
||||
boolean ssl
|
||||
) throws IOException {
|
||||
return getInstance(server,vhost,port,timeout,ssl,null,null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method gets a new httpc instance from the object pool and
|
||||
* initializes it with the given parameters.
|
||||
*
|
||||
* @param server
|
||||
* @param port
|
||||
* @param timeout
|
||||
* @param ssl
|
||||
* @throws IOException
|
||||
* @see httpc#init
|
||||
*/
|
||||
public static httpc getInstance(
|
||||
String server,
|
||||
String vhost,
|
||||
int port,
|
||||
int timeout,
|
||||
boolean ssl,
|
||||
String incomingByteCountAccounting,
|
||||
String outgoingByteCountAccounting
|
||||
) throws IOException {
|
||||
|
||||
httpc newHttpc = null;
|
||||
// fetching a new httpc from the object pool
|
||||
try {
|
||||
newHttpc = (httpc) httpc.theHttpcPool.borrowObject();
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Unable to fetch a new httpc from pool. " + e.getMessage());
|
||||
}
|
||||
|
||||
// initialize it
|
||||
try {
|
||||
newHttpc.init(server,vhost,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
|
||||
} catch (IOException e) {
|
||||
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
|
||||
throw e;
|
||||
}
|
||||
return newHttpc;
|
||||
|
||||
|
||||
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -439,7 +384,7 @@ public final class httpc {
|
|||
* @param remoteProxyPort
|
||||
* @throws IOException
|
||||
*/
|
||||
void init(
|
||||
private void init(
|
||||
String server,
|
||||
String vhost,
|
||||
int port,
|
||||
|
@ -450,6 +395,20 @@ public final class httpc {
|
|||
String outgoingByteCountAccounting
|
||||
) throws IOException {
|
||||
|
||||
if ((theRemoteProxyConfig == null) ||
|
||||
(!theRemoteProxyConfig.useProxy())) {
|
||||
initN(
|
||||
server,
|
||||
vhost,
|
||||
port,
|
||||
timeout,
|
||||
ssl,
|
||||
incomingByteCountAccounting,
|
||||
outgoingByteCountAccounting
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (port == -1) {
|
||||
port = (ssl)? 443 : 80;
|
||||
}
|
||||
|
@ -457,7 +416,14 @@ public final class httpc {
|
|||
String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
|
||||
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
|
||||
|
||||
this.init(remoteProxyHost, vhost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
|
||||
this.initN(
|
||||
remoteProxyHost,
|
||||
vhost,
|
||||
remoteProxyPort,
|
||||
timeout,
|
||||
ssl,
|
||||
incomingByteCountAccounting,
|
||||
outgoingByteCountAccounting);
|
||||
|
||||
this.remoteProxyUse = true;
|
||||
this.adressed_host = server;
|
||||
|
@ -476,7 +442,7 @@ public final class httpc {
|
|||
* @param ssl Wether we should use SSL.
|
||||
* @throws IOException
|
||||
*/
|
||||
void init(
|
||||
private void initN(
|
||||
String server,
|
||||
String vhost,
|
||||
int port,
|
||||
|
@ -968,11 +934,7 @@ public final class httpc {
|
|||
|
||||
httpc con = null;
|
||||
try {
|
||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
|
||||
} else {
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
}
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
|
||||
httpc.response res = con.GET(path, requestHeader);
|
||||
if (res.status.startsWith("2")) {
|
||||
|
@ -1036,11 +998,7 @@ public final class httpc {
|
|||
|
||||
httpc con = null;
|
||||
try {
|
||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
|
||||
} else {
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
}
|
||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
httpc.response res = con.POST(path, requestHeader, props, files);
|
||||
|
||||
//System.out.println("response=" + res.toString());
|
||||
|
@ -1198,10 +1156,7 @@ public final class httpc {
|
|||
// start connection
|
||||
httpc con = null;
|
||||
try {
|
||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
|
||||
con = httpc.getInstance(realhost, vhost, port, timeout, ssl);
|
||||
else con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
|
||||
con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||
httpc.response res = con.HEAD(path, requestHeader);
|
||||
if (res.status.startsWith("2")) {
|
||||
// success
|
||||
|
|
|
@ -1265,15 +1265,13 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
|
||||
|
||||
// a new httpc connection, combined with possible remote proxy
|
||||
boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());
|
||||
|
||||
// check no-proxy rule
|
||||
if (
|
||||
(switchboard.remoteProxyConfig != null) &&
|
||||
(switchboard.remoteProxyConfig.useProxy()) &&
|
||||
(!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
|
||||
if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
|
||||
useProxy = false;
|
||||
(remProxyConfig != null) &&
|
||||
(remProxyConfig.useProxy()) &&
|
||||
(!(remProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
|
||||
if (remProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
|
||||
remProxyConfig = null;
|
||||
} else {
|
||||
// analyse remoteProxyNoProxy;
|
||||
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
|
||||
|
@ -1282,7 +1280,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
|
||||
// disallow proxy for this server
|
||||
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
|
||||
useProxy = false;
|
||||
remProxyConfig = null;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
|
@ -1295,8 +1293,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
}
|
||||
|
||||
// branch to server/proxy
|
||||
if (useProxy) {
|
||||
return httpc.getInstance(
|
||||
return httpc.getInstance(
|
||||
server,
|
||||
server,
|
||||
port,
|
||||
|
@ -1304,14 +1301,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
false,
|
||||
remProxyConfig
|
||||
);
|
||||
}
|
||||
return httpc.getInstance(
|
||||
server,
|
||||
server,
|
||||
port,
|
||||
timeout,
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
private httpc newhttpc(String address, int timeout) throws IOException {
|
||||
|
|
|
@ -117,7 +117,7 @@ public class kelondroMSetTools {
|
|||
k = (Long) orderMap.firstKey(); // the next smallest...
|
||||
mapA = joinResult;
|
||||
mapB = (TreeMap) orderMap.remove(k);
|
||||
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings);
|
||||
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings); // TODO: better with enumeration?
|
||||
// free resources
|
||||
mapA = null;
|
||||
mapB = null;
|
||||
|
|
|
@ -196,9 +196,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
|
|||
requestHeader.put(httpHeader.ACCEPT_ENCODING, this.acceptEncoding);
|
||||
|
||||
// open the connection
|
||||
remote = ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy()))
|
||||
? httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null)
|
||||
: httpc.getInstance(host, host, port, this.socketTimeout, ssl, "CRAWLER",null);
|
||||
remote = httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null);
|
||||
|
||||
// specifying if content encoding is allowed
|
||||
remote.setAllowContentEncoding((this.acceptEncoding != null && this.acceptEncoding.length() > 0));
|
||||
|
|
|
@ -424,7 +424,7 @@ public final class plasmaCrawlLURL {
|
|||
URL newUrl = new URL(newUrlStr);
|
||||
|
||||
// doing a http head request to test if the url is correct
|
||||
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false);
|
||||
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
|
||||
response res = theHttpc.HEAD(newUrl.getPath(), null);
|
||||
|
||||
if (res.statusCode == 200) {
|
||||
|
|
|
@ -919,7 +919,8 @@ public final class plasmaParser {
|
|||
contentURL.getHost(),
|
||||
contentURL.getPort(),
|
||||
5000,
|
||||
contentURL.getProtocol().equalsIgnoreCase("https"));
|
||||
contentURL.getProtocol().equalsIgnoreCase("https"),
|
||||
null);
|
||||
|
||||
httpc.response res = remote.GET(contentURL.getFile(), null);
|
||||
if (res.statusCode != 200) {
|
||||
|
|
|
@ -763,22 +763,13 @@ public final class yacySeedDB {
|
|||
httpc remote = null;
|
||||
try {
|
||||
// init httpc
|
||||
if ((sb.remoteProxyConfig == null)||(!sb.remoteProxyConfig.useProxy())) {
|
||||
remote = httpc.getInstance(
|
||||
seedURL.getHost(),
|
||||
seedURL.getHost(),
|
||||
seedURL.getPort(),
|
||||
10000,
|
||||
seedURL.getProtocol().equalsIgnoreCase("https"));
|
||||
} else {
|
||||
remote = httpc.getInstance(
|
||||
remote = httpc.getInstance(
|
||||
seedURL.getHost(),
|
||||
seedURL.getHost(),
|
||||
seedURL.getPort(),
|
||||
10000,
|
||||
seedURL.getProtocol().equalsIgnoreCase("https"),
|
||||
sb.remoteProxyConfig);
|
||||
}
|
||||
sb.remoteProxyConfig);
|
||||
|
||||
// Configure http headers
|
||||
httpHeader reqHeader = new httpHeader();
|
||||
|
|
|
@ -514,7 +514,7 @@ public final class yacy {
|
|||
httpHeader requestHeader = new httpHeader();
|
||||
requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
|
||||
try {
|
||||
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false);
|
||||
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false, null);
|
||||
httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);
|
||||
|
||||
// read response
|
||||
|
|
Loading…
Reference in New Issue
Block a user