mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
some generalization of remote proxy configuration and setting handling in httpc
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4023 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
fac8de6e9b
commit
57a5b6fa71
|
@ -3,7 +3,7 @@ javacSource=1.4
|
||||||
javacTarget=1.4
|
javacTarget=1.4
|
||||||
|
|
||||||
# Release Configuration
|
# Release Configuration
|
||||||
releaseVersion=0.54
|
releaseVersion=0.541
|
||||||
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||||
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
|
||||||
releaseFileParentDir=yacy
|
releaseFileParentDir=yacy
|
||||||
|
|
|
@ -512,7 +512,8 @@ public class CrawlURLFetch_p {
|
||||||
url.getHost(),
|
url.getHost(),
|
||||||
url.getPort(),
|
url.getPort(),
|
||||||
15000,
|
15000,
|
||||||
url.getProtocol().equals("https"));
|
url.getProtocol().equals("https"),
|
||||||
|
plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
|
||||||
|
|
||||||
httpHeader header = new httpHeader();
|
httpHeader header = new httpHeader();
|
||||||
header.put(httpHeader.ACCEPT_ENCODING, "US-ASCII");
|
header.put(httpHeader.ACCEPT_ENCODING, "US-ASCII");
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
|
<OpenSearchDescription xmlns="http://www.opensearch.org/Specifications/OpenSearch/1.1">
|
||||||
<ShortName>YaCy/#[clientname]#</ShortName>
|
<ShortName>YaCy/#[clientname]#</ShortName>
|
||||||
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
|
<LongName>YaCy.net - #[SearchPageGreeting]#</LongName>
|
||||||
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
|
<Image type="image/gif">http://#[thisaddress]#/env/grafics/yacy.gif</Image>
|
||||||
|
@ -15,6 +15,6 @@
|
||||||
<Query role="example" searchTerms="yacy" />
|
<Query role="example" searchTerms="yacy" />
|
||||||
<Tags>YaCy P2P Web Search</Tags>
|
<Tags>YaCy P2P Web Search</Tags>
|
||||||
<Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
|
<Contact>See http://#[thisaddress]#/ViewProfile.html?hash=localhash</Contact>
|
||||||
<Attribution>YaCy Software &copy; 2004-2006 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
|
<Attribution>YaCy Software &copy; 2004-2007 by Michael Christen et al., YaCy.net; Content: ask peer owner</Attribution>
|
||||||
<SyndicationRight>open</SyndicationRight>
|
<SyndicationRight>open</SyndicationRight>
|
||||||
</OpenSearchDescription>
|
</OpenSearchDescription>
|
|
@ -180,7 +180,8 @@ public class SitemapParser extends DefaultHandler {
|
||||||
this.siteMapURL.getHost(),
|
this.siteMapURL.getHost(),
|
||||||
this.siteMapURL.getPort(),
|
this.siteMapURL.getPort(),
|
||||||
5000,
|
5000,
|
||||||
this.siteMapURL.getProtocol().equalsIgnoreCase("https"));
|
this.siteMapURL.getProtocol().equalsIgnoreCase("https"),
|
||||||
|
switchboard.remoteProxyConfig);
|
||||||
|
|
||||||
httpc.response res = remote.GET(this.siteMapURL.getFile(), null);
|
httpc.response res = remote.GET(this.siteMapURL.getFile(), null);
|
||||||
if (res.statusCode != 200) {
|
if (res.statusCode != 200) {
|
||||||
|
|
|
@ -386,15 +386,7 @@ public final class robotsParser{
|
||||||
downloadStart = System.currentTimeMillis();
|
downloadStart = System.currentTimeMillis();
|
||||||
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
|
plasmaSwitchboard sb = plasmaSwitchboard.getSwitchboard();
|
||||||
//TODO: adding Traffic statistic for robots download?
|
//TODO: adding Traffic statistic for robots download?
|
||||||
if (
|
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
|
||||||
(sb == null) ||
|
|
||||||
(sb.remoteProxyConfig == null) ||
|
|
||||||
(!sb.remoteProxyConfig.useProxy())
|
|
||||||
) {
|
|
||||||
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"));
|
|
||||||
} else {
|
|
||||||
con = httpc.getInstance(robotsURL.getHost(), robotsURL.getHost(), robotsURL.getPort(), 10000, robotsURL.getProtocol().equalsIgnoreCase("https"), sb.remoteProxyConfig);
|
|
||||||
}
|
|
||||||
|
|
||||||
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
|
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
|
||||||
httpHeader reqHeaders = new httpHeader();
|
httpHeader reqHeaders = new httpHeader();
|
||||||
|
|
|
@ -308,62 +308,7 @@ public final class httpc {
|
||||||
boolean ssl,
|
boolean ssl,
|
||||||
httpRemoteProxyConfig remoteProxyConfig
|
httpRemoteProxyConfig remoteProxyConfig
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
if (remoteProxyConfig == null) throw new NullPointerException("Proxy object must not be null.");
|
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
|
||||||
|
|
||||||
return getInstance(server,vhost,port,timeout,ssl,remoteProxyConfig,null,null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static httpc getInstance(
|
|
||||||
String server,
|
|
||||||
String vhost,
|
|
||||||
int port,
|
|
||||||
int timeout,
|
|
||||||
boolean ssl
|
|
||||||
) throws IOException {
|
|
||||||
return getInstance(server,vhost,port,timeout,ssl,null,null);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This method gets a new httpc instance from the object pool and
|
|
||||||
* initializes it with the given parameters.
|
|
||||||
*
|
|
||||||
* @param server
|
|
||||||
* @param port
|
|
||||||
* @param timeout
|
|
||||||
* @param ssl
|
|
||||||
* @throws IOException
|
|
||||||
* @see httpc#init
|
|
||||||
*/
|
|
||||||
public static httpc getInstance(
|
|
||||||
String server,
|
|
||||||
String vhost,
|
|
||||||
int port,
|
|
||||||
int timeout,
|
|
||||||
boolean ssl,
|
|
||||||
String incomingByteCountAccounting,
|
|
||||||
String outgoingByteCountAccounting
|
|
||||||
) throws IOException {
|
|
||||||
|
|
||||||
httpc newHttpc = null;
|
|
||||||
// fetching a new httpc from the object pool
|
|
||||||
try {
|
|
||||||
newHttpc = (httpc) httpc.theHttpcPool.borrowObject();
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new IOException("Unable to fetch a new httpc from pool. " + e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
// initialize it
|
|
||||||
try {
|
|
||||||
newHttpc.init(server,vhost,port,timeout,ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
|
|
||||||
} catch (IOException e) {
|
|
||||||
try{ httpc.theHttpcPool.returnObject(newHttpc); } catch (Exception e1) {}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
return newHttpc;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -439,7 +384,7 @@ public final class httpc {
|
||||||
* @param remoteProxyPort
|
* @param remoteProxyPort
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
void init(
|
private void init(
|
||||||
String server,
|
String server,
|
||||||
String vhost,
|
String vhost,
|
||||||
int port,
|
int port,
|
||||||
|
@ -450,6 +395,20 @@ public final class httpc {
|
||||||
String outgoingByteCountAccounting
|
String outgoingByteCountAccounting
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
|
|
||||||
|
if ((theRemoteProxyConfig == null) ||
|
||||||
|
(!theRemoteProxyConfig.useProxy())) {
|
||||||
|
initN(
|
||||||
|
server,
|
||||||
|
vhost,
|
||||||
|
port,
|
||||||
|
timeout,
|
||||||
|
ssl,
|
||||||
|
incomingByteCountAccounting,
|
||||||
|
outgoingByteCountAccounting
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (port == -1) {
|
if (port == -1) {
|
||||||
port = (ssl)? 443 : 80;
|
port = (ssl)? 443 : 80;
|
||||||
}
|
}
|
||||||
|
@ -457,7 +416,14 @@ public final class httpc {
|
||||||
String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
|
String remoteProxyHost = theRemoteProxyConfig.getProxyHost();
|
||||||
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
|
int remoteProxyPort = theRemoteProxyConfig.getProxyPort();
|
||||||
|
|
||||||
this.init(remoteProxyHost, vhost, remoteProxyPort, timeout, ssl,incomingByteCountAccounting,outgoingByteCountAccounting);
|
this.initN(
|
||||||
|
remoteProxyHost,
|
||||||
|
vhost,
|
||||||
|
remoteProxyPort,
|
||||||
|
timeout,
|
||||||
|
ssl,
|
||||||
|
incomingByteCountAccounting,
|
||||||
|
outgoingByteCountAccounting);
|
||||||
|
|
||||||
this.remoteProxyUse = true;
|
this.remoteProxyUse = true;
|
||||||
this.adressed_host = server;
|
this.adressed_host = server;
|
||||||
|
@ -476,7 +442,7 @@ public final class httpc {
|
||||||
* @param ssl Wether we should use SSL.
|
* @param ssl Wether we should use SSL.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
void init(
|
private void initN(
|
||||||
String server,
|
String server,
|
||||||
String vhost,
|
String vhost,
|
||||||
int port,
|
int port,
|
||||||
|
@ -968,11 +934,7 @@ public final class httpc {
|
||||||
|
|
||||||
httpc con = null;
|
httpc con = null;
|
||||||
try {
|
try {
|
||||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
|
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
|
|
||||||
} else {
|
|
||||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
|
||||||
}
|
|
||||||
|
|
||||||
httpc.response res = con.GET(path, requestHeader);
|
httpc.response res = con.GET(path, requestHeader);
|
||||||
if (res.status.startsWith("2")) {
|
if (res.status.startsWith("2")) {
|
||||||
|
@ -1036,11 +998,7 @@ public final class httpc {
|
||||||
|
|
||||||
httpc con = null;
|
httpc con = null;
|
||||||
try {
|
try {
|
||||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy())) {
|
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl);
|
|
||||||
} else {
|
|
||||||
con = httpc.getInstance(realhost, virtualhost, port, timeout, ssl, theRemoteProxyConfig);
|
|
||||||
}
|
|
||||||
httpc.response res = con.POST(path, requestHeader, props, files);
|
httpc.response res = con.POST(path, requestHeader, props, files);
|
||||||
|
|
||||||
//System.out.println("response=" + res.toString());
|
//System.out.println("response=" + res.toString());
|
||||||
|
@ -1198,10 +1156,7 @@ public final class httpc {
|
||||||
// start connection
|
// start connection
|
||||||
httpc con = null;
|
httpc con = null;
|
||||||
try {
|
try {
|
||||||
if ((theRemoteProxyConfig == null)||(!theRemoteProxyConfig.useProxy()))
|
con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
|
||||||
con = httpc.getInstance(realhost, vhost, port, timeout, ssl);
|
|
||||||
else con = httpc.getInstance(realhost, vhost, port, timeout, ssl, theRemoteProxyConfig);
|
|
||||||
|
|
||||||
httpc.response res = con.HEAD(path, requestHeader);
|
httpc.response res = con.HEAD(path, requestHeader);
|
||||||
if (res.status.startsWith("2")) {
|
if (res.status.startsWith("2")) {
|
||||||
// success
|
// success
|
||||||
|
|
|
@ -1265,15 +1265,13 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
|
httpRemoteProxyConfig remProxyConfig = switchboard.remoteProxyConfig;
|
||||||
|
|
||||||
// a new httpc connection, combined with possible remote proxy
|
// a new httpc connection, combined with possible remote proxy
|
||||||
boolean useProxy = (remProxyConfig!=null)&&(remProxyConfig.useProxy());
|
|
||||||
|
|
||||||
// check no-proxy rule
|
// check no-proxy rule
|
||||||
if (
|
if (
|
||||||
(switchboard.remoteProxyConfig != null) &&
|
(remProxyConfig != null) &&
|
||||||
(switchboard.remoteProxyConfig.useProxy()) &&
|
(remProxyConfig.useProxy()) &&
|
||||||
(!(switchboard.remoteProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
|
(!(remProxyConfig.remoteProxyAllowProxySet.contains(server)))) {
|
||||||
if (switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
|
if (remProxyConfig.remoteProxyDisallowProxySet.contains(server)) {
|
||||||
useProxy = false;
|
remProxyConfig = null;
|
||||||
} else {
|
} else {
|
||||||
// analyse remoteProxyNoProxy;
|
// analyse remoteProxyNoProxy;
|
||||||
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
|
// set either remoteProxyAllowProxySet or remoteProxyDisallowProxySet accordingly
|
||||||
|
@ -1282,7 +1280,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
|
if (server.matches(remProxyConfig.getProxyNoProxyPatterns()[i])) {
|
||||||
// disallow proxy for this server
|
// disallow proxy for this server
|
||||||
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
|
switchboard.remoteProxyConfig.remoteProxyDisallowProxySet.add(server);
|
||||||
useProxy = false;
|
remProxyConfig = null;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
|
@ -1295,8 +1293,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
}
|
}
|
||||||
|
|
||||||
// branch to server/proxy
|
// branch to server/proxy
|
||||||
if (useProxy) {
|
return httpc.getInstance(
|
||||||
return httpc.getInstance(
|
|
||||||
server,
|
server,
|
||||||
server,
|
server,
|
||||||
port,
|
port,
|
||||||
|
@ -1304,14 +1301,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
||||||
false,
|
false,
|
||||||
remProxyConfig
|
remProxyConfig
|
||||||
);
|
);
|
||||||
}
|
|
||||||
return httpc.getInstance(
|
|
||||||
server,
|
|
||||||
server,
|
|
||||||
port,
|
|
||||||
timeout,
|
|
||||||
false
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private httpc newhttpc(String address, int timeout) throws IOException {
|
private httpc newhttpc(String address, int timeout) throws IOException {
|
||||||
|
|
|
@ -117,7 +117,7 @@ public class kelondroMSetTools {
|
||||||
k = (Long) orderMap.firstKey(); // the next smallest...
|
k = (Long) orderMap.firstKey(); // the next smallest...
|
||||||
mapA = joinResult;
|
mapA = joinResult;
|
||||||
mapB = (TreeMap) orderMap.remove(k);
|
mapB = (TreeMap) orderMap.remove(k);
|
||||||
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings);
|
joinResult = joinConstructiveByTest(mapA, mapB, concatStrings); // TODO: better with enumeration?
|
||||||
// free resources
|
// free resources
|
||||||
mapA = null;
|
mapA = null;
|
||||||
mapB = null;
|
mapB = null;
|
||||||
|
|
|
@ -196,9 +196,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
|
||||||
requestHeader.put(httpHeader.ACCEPT_ENCODING, this.acceptEncoding);
|
requestHeader.put(httpHeader.ACCEPT_ENCODING, this.acceptEncoding);
|
||||||
|
|
||||||
// open the connection
|
// open the connection
|
||||||
remote = ((this.remoteProxyConfig != null) && (this.remoteProxyConfig.useProxy()))
|
remote = httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null);
|
||||||
? httpc.getInstance(host, host, port, this.socketTimeout, ssl, this.remoteProxyConfig,"CRAWLER",null)
|
|
||||||
: httpc.getInstance(host, host, port, this.socketTimeout, ssl, "CRAWLER",null);
|
|
||||||
|
|
||||||
// specifying if content encoding is allowed
|
// specifying if content encoding is allowed
|
||||||
remote.setAllowContentEncoding((this.acceptEncoding != null && this.acceptEncoding.length() > 0));
|
remote.setAllowContentEncoding((this.acceptEncoding != null && this.acceptEncoding.length() > 0));
|
||||||
|
|
|
@ -424,7 +424,7 @@ public final class plasmaCrawlLURL {
|
||||||
URL newUrl = new URL(newUrlStr);
|
URL newUrl = new URL(newUrlStr);
|
||||||
|
|
||||||
// doing a http head request to test if the url is correct
|
// doing a http head request to test if the url is correct
|
||||||
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false);
|
theHttpc = httpc.getInstance(newUrl.getHost(), newUrl.getHost(), newUrl.getPort(), 30000, false, plasmaSwitchboard.getSwitchboard().remoteProxyConfig);
|
||||||
response res = theHttpc.HEAD(newUrl.getPath(), null);
|
response res = theHttpc.HEAD(newUrl.getPath(), null);
|
||||||
|
|
||||||
if (res.statusCode == 200) {
|
if (res.statusCode == 200) {
|
||||||
|
|
|
@ -919,7 +919,8 @@ public final class plasmaParser {
|
||||||
contentURL.getHost(),
|
contentURL.getHost(),
|
||||||
contentURL.getPort(),
|
contentURL.getPort(),
|
||||||
5000,
|
5000,
|
||||||
contentURL.getProtocol().equalsIgnoreCase("https"));
|
contentURL.getProtocol().equalsIgnoreCase("https"),
|
||||||
|
null);
|
||||||
|
|
||||||
httpc.response res = remote.GET(contentURL.getFile(), null);
|
httpc.response res = remote.GET(contentURL.getFile(), null);
|
||||||
if (res.statusCode != 200) {
|
if (res.statusCode != 200) {
|
||||||
|
|
|
@ -763,22 +763,13 @@ public final class yacySeedDB {
|
||||||
httpc remote = null;
|
httpc remote = null;
|
||||||
try {
|
try {
|
||||||
// init httpc
|
// init httpc
|
||||||
if ((sb.remoteProxyConfig == null)||(!sb.remoteProxyConfig.useProxy())) {
|
remote = httpc.getInstance(
|
||||||
remote = httpc.getInstance(
|
|
||||||
seedURL.getHost(),
|
|
||||||
seedURL.getHost(),
|
|
||||||
seedURL.getPort(),
|
|
||||||
10000,
|
|
||||||
seedURL.getProtocol().equalsIgnoreCase("https"));
|
|
||||||
} else {
|
|
||||||
remote = httpc.getInstance(
|
|
||||||
seedURL.getHost(),
|
seedURL.getHost(),
|
||||||
seedURL.getHost(),
|
seedURL.getHost(),
|
||||||
seedURL.getPort(),
|
seedURL.getPort(),
|
||||||
10000,
|
10000,
|
||||||
seedURL.getProtocol().equalsIgnoreCase("https"),
|
seedURL.getProtocol().equalsIgnoreCase("https"),
|
||||||
sb.remoteProxyConfig);
|
sb.remoteProxyConfig);
|
||||||
}
|
|
||||||
|
|
||||||
// Configure http headers
|
// Configure http headers
|
||||||
httpHeader reqHeader = new httpHeader();
|
httpHeader reqHeader = new httpHeader();
|
||||||
|
|
|
@ -514,7 +514,7 @@ public final class yacy {
|
||||||
httpHeader requestHeader = new httpHeader();
|
httpHeader requestHeader = new httpHeader();
|
||||||
requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
|
requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
|
||||||
try {
|
try {
|
||||||
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false);
|
httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false, null);
|
||||||
httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);
|
httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);
|
||||||
|
|
||||||
// read response
|
// read response
|
||||||
|
|
Loading…
Reference in New Issue
Block a user