... migrating to HttpComponents-Client-4.x ...

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6989 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
sixcooler 2010-07-22 23:08:37 +00:00
parent 572e429eff
commit b7102eff92
10 changed files with 153 additions and 67 deletions

View File

@ -37,11 +37,12 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Switchboard;
@ -102,7 +103,8 @@ public class ConfigAppearance_p {
final DigestURI u = new DigestURI(url, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

View File

@ -37,13 +37,14 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.WorkTables;
import de.anomic.data.translator;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.Switchboard;
@ -107,7 +108,8 @@ public class ConfigLanguage_p {
final DigestURI u = new DigestURI(url, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
}catch(final IOException e){
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

View File

@ -43,7 +43,7 @@ import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.data.list.ListAccumulator;
import de.anomic.data.list.XMLBlacklistImporter;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.search.SearchEventCache;
@ -51,6 +51,7 @@ import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacySeed;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
@ -150,7 +151,8 @@ public class sharedBlacklist_p {
// get List
DigestURI u = new DigestURI(downloadURLOld, null);
otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000));
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000));
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 1000));
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", Hash);
@ -169,7 +171,8 @@ public class sharedBlacklist_p {
final DigestURI u = new DigestURI(downloadURL, null);
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); //get List
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); //get List
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

View File

@ -39,6 +39,7 @@ import java.util.Map;
import java.util.TreeSet;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Client;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -54,8 +55,8 @@ import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist;
import de.anomic.http.client.Client;
import de.anomic.http.server.ResponseContainer;
//import de.anomic.http.client.Client;
//import de.anomic.http.server.ResponseContainer;
public final class MetadataRepository implements Iterable<byte[]> {
@ -268,6 +269,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
}
log.logInfo("URLs vorher: " + urlIndexFile.size() + " Entries loaded during Iteratorloop: " + iteratorCount + " kaputte URLs: " + damagedURLS.size());
final Client client = new Client();
final Iterator<String> eiter2 = damagedURLS.iterator();
byte[] urlHashBytes;
while (eiter2.hasNext()) {
@ -289,24 +291,25 @@ public final class MetadataRepository implements Iterable<byte[]> {
final DigestURI newUrl = new DigestURI(newUrlStr, null);
// doing a http head request to test if the url is correct
final Client client = new Client(10000);
ResponseContainer res = null;
try {
res = client.HEAD(newUrl.toString());
} finally {
if(res != null) {
// release connection
res.closeStream();
}
}
// final Client client = new Client(10000);
// ResponseContainer res = null;
// try {
// res = client.HEAD(newUrl.toString());
// } finally {
// if(res != null) {
// // release connection
// res.closeStream();
// }
// }
if (res != null && res.getStatusCode() == 200) {
if (client.HEADResponse(newUrl.toString()) != null
&& client.getHttpResponse().getStatusLine().getStatusCode() == 200) {
entry.setCol(1, newUrl.toString().getBytes());
urlIndexFile.put(entry);
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + new String(urlHashBytes) + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
} else {
remove(urlHashBytes);
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + new String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (res == null ? "null" : res.getStatusLine()));
if (log.isInfo()) log.logInfo("UrlDB-Entry with urlHash '" + new String(urlHashBytes) + "' removed\n\tURL: " + oldUrlStr + "\n\tConnection Status: " + (client.getHttpResponse() == null ? "null" : client.getHttpResponse().getStatusLine()));
}
}
} catch (final Exception e) {

View File

@ -72,6 +72,7 @@ import java.util.zip.ZipInputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.Client;
import net.yacy.cora.protocol.ConnectionInfo;
import net.yacy.cora.protocol.ProxySettings;
import net.yacy.document.Condenser;
@ -134,7 +135,7 @@ import de.anomic.data.userDB;
import de.anomic.data.wiki.wikiBoard;
import de.anomic.data.wiki.wikiCode;
import de.anomic.data.wiki.wikiParser;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.client.Cache;
import de.anomic.http.server.HTTPDemon;
import de.anomic.http.server.HeaderFramework;
@ -1161,7 +1162,7 @@ public final class Switchboard extends serverSwitch {
indexingStorageProcessor.awaitShutdown(12000);
crawlStacker.close();
this.dbImportManager.close();
Client.closeAllConnections();
de.anomic.http.client.Client.closeAllConnections();
wikiDB.close();
blogDB.close();
blogCommentDB.close();
@ -1180,7 +1181,7 @@ public final class Switchboard extends serverSwitch {
UPnP.deletePortMapping();
Tray.removeTray();
try {
net.yacy.cora.protocol.Client.closeConnectionManager();
Client.closeConnectionManager();
} catch (InterruptedException e) {
Log.logException(e);
}
@ -1485,7 +1486,7 @@ public final class Switchboard extends serverSwitch {
}
// close unused connections
Client.cleanup();
de.anomic.http.client.Client.cleanup();
ConnectionInfo.cleanUp();
// do transmission of CR-files
@ -2333,6 +2334,14 @@ public final class Switchboard extends serverSwitch {
final int sc = peers.sizeConnected();
ResponseHeader header;
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
final Client client = new Client();
client.setHeader(reqHeader.entrySet());
client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));
yacyCore.log.logInfo("BOOTSTRAP: " + sc + " seeds known from previous run");
// - use the superseed to further fill up the seedDB
@ -2348,15 +2357,14 @@ public final class Switchboard extends serverSwitch {
) {
// load the seed list
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
url = new DigestURI(seedListFileURL, null);
final long start = System.currentTimeMillis();
header = Client.whead(url.toString(), reqHeader);
// header = Client.whead(url.toString(), reqHeader);
client.HEADResponse(url.toString());
header = new ResponseHeader(client.getHeaderHashMap());
final long loadtime = System.currentTimeMillis() - start;
// if (header == null) {
if (header == null) {
if (loadtime > getConfigLong("bootstrapLoadTimeout", 6000)) {
yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, time-out after " + loadtime + " milliseconds");
@ -2369,7 +2377,8 @@ public final class Switchboard extends serverSwitch {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
final byte[] content = Client.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000));
// final byte[] content = Client.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000));
final byte[] content = client.GETbytes(url.toString());
enu = FileUtils.strings(content);
lc = 0;
while (enu.hasNext()) {
@ -2480,11 +2489,14 @@ public final class Switchboard extends serverSwitch {
* @return
*/
public static Map<String, String> loadFileAsMap(final DigestURI url) {
try {
// sending request
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
final Map<String, String> result = FileUtils.table(Client.wget(url.toString(), reqHeader, 10000));
final Client client = new Client();
client.setHeader(reqHeader.entrySet());
try {
// sending request
// final Map<String, String> result = FileUtils.table(Client.wget(url.toString(), reqHeader, 10000));
final Map<String, String> result = FileUtils.table(client.GETbytes(url.toString()));
if (result == null) return new HashMap<String, String>();
return result;
} catch (final Exception e) {

View File

@ -23,11 +23,12 @@ package de.anomic.tools;
import java.util.Hashtable;
import net.yacy.cora.protocol.HttpConnector;
import net.yacy.cora.protocol.ProxySettings;
import net.yacy.kelondro.data.meta.DigestURI;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
@ -121,7 +122,8 @@ public class loaderThreads {
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
page = Client.wget(url.toString(), reqHeader, timeout);
// page = Client.wget(url.toString(), reqHeader, timeout);
page = HttpConnector.wget(url.toString(), reqHeader.entrySet(), timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {

View File

@ -1212,12 +1212,12 @@ public final class yacyClient {
} catch (IOException e1) {
Log.logException(e1);
}
}
try {
net.yacy.cora.protocol.Client.closeConnectionManager();
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
}

View File

@ -39,6 +39,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.protocol.Client;
import net.yacy.kelondro.blob.MapDataMining;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
@ -50,12 +51,12 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.http.client.Client;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HTTPDemon;
import de.anomic.http.server.AlternativeDomainNames;
import de.anomic.http.server.HeaderFramework;
import de.anomic.http.server.RequestHeader;
import de.anomic.http.server.ResponseContainer;
//import de.anomic.http.server.ResponseContainer;
import de.anomic.search.Switchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverSwitch;
@ -855,24 +856,38 @@ public final class yacySeedDB implements AlternativeDomainNames {
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
// init http-client
final Client client = new Client(10000, reqHeader);
// final Client client = new Client(10000, reqHeader);
// byte[] content = null;
// ResponseContainer res = null;
// try {
// // send request
// res = client.GET(seedURL.toString());
//
// // check response code
// if (res.getStatusCode() != 200) {
// throw new IOException("Server returned status: " + res.getStatusLine());
// }
//
// // read byte array
// content = res.getData();
// } finally {
// if(res != null) {
// res.closeStream();
// }
// }
final Client client = new Client();
client.setHeader(reqHeader.entrySet());
byte[] content = null;
ResponseContainer res = null;
try {
// send request
res = client.GET(seedURL.toString());
content = client.GETbytes(seedURL.toString());
} catch (final Exception e) {
throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
}
// check response code
if (res.getStatusCode() != 200) {
throw new IOException("Server returned status: " + res.getStatusLine());
}
// read byte array
content = res.getData();
} finally {
if(res != null) {
res.closeStream();
}
if (client.getHttpResponse().getStatusLine().getStatusCode() != 200) {
throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine());
}
try {
@ -882,7 +897,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
// convert it into an array
return FileUtils.strings(content);
} catch (final Exception e) {
throw new IOException("Unable to download seed file '" + seedURL + "'. " + e.getMessage());
throw new IOException("Unable to uncompress seed file '" + seedURL + "'. " + e.getMessage());
}
}

View File

@ -2,6 +2,7 @@ package net.yacy.cora.protocol;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.Map.Entry;
@ -44,6 +45,7 @@ import org.apache.http.util.EntityUtils;
/**
* HttpClient implementation which uses HttpComponents Client {@link http://hc.apache.org/}
*
* @author sixcooler
*
@ -54,7 +56,7 @@ public class Client {
private static IdledConnectionEvictor idledConnectionEvictor = null;
private static HttpClient httpClient = null;
private Header[] headers = null;
private HttpResponse httpResponse;
private HttpResponse httpResponse = null;
private long upbytes = 0L;
private int timeout = 10000;
private String userAgent = null;
@ -144,12 +146,14 @@ public class Client {
* @param entrys to be set as request header
*/
public void setHeader(final Set<Entry<String, String>> entrys) {
if (entrys != null) {
int i = 0;
headers = new Header[entrys.size()];
for (final Entry<String, String> entry : entrys) {
headers[i++] = new BasicHeader(entry.getKey(),entry.getValue());
}
}
}
/**
*
@ -239,6 +243,15 @@ public class Client {
return httpResponse;
}
public HashMap<String, String> getHeaderHashMap() {
if (httpResponse == null) return null;
final HashMap<String, String> hmap = new HashMap<String, String>();
for (Header h : httpResponse.getAllHeaders()) {
hmap.put(h.getName(), h.getValue());
}
return hmap;
}
private byte[] getContentBytes(HttpUriRequest httpUriRequest, long maxBytes) throws IOException {
byte[] content = null;
final HttpContext httpContext = new BasicHttpContext();

View File

@ -23,6 +23,8 @@ package net.yacy.cora.protocol;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.Map.Entry;
//import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
@ -103,6 +105,19 @@ public class HttpConnector {
return client.POSTbytes(url, post);
}
/**
* get data from the server named by url
*
* @param url address of the server
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
return wget(url.toNormalform(false, false), url.getHost(), timeout);
}
/**
* get data from the server named by vhost
*
@ -121,17 +136,36 @@ public class HttpConnector {
return client.GETbytes(url);
}
/**
* get data from the server named by vhost
*
* @param url address of the server
* @param entrys of RequestHeader
* @param timeout in milliseconds
* @return response body
* @throws IOException
*/
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout) throws IOException {
return wget(url, entrys, timeout, null);
}
/**
* get data from the server named by url
*
* @param url address of the server
* @param entrys of RequestHeader
* @param timeout in milliseconds
* @param vhost name of the server at address which should respond
* @return response body
* @throws IOException
*/
public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
return wget(url.toNormalform(false, false), url.getHost(), timeout);
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout, final String vhost) throws IOException {
final Client client = new Client();
client.setHeader(entrys);
client.setTimout(timeout);
client.setHost(vhost);
return client.GETbytes(url);
}
// public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {