mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- moved yacybot user agent string definition to MultiProtocolURI since there are basic access mechanisms where the bot string is needed
- migrated the 'yacy' user agent to 'yacybot' in many client methods since the 'yacy' user agent is only used for the proxy git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7199 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
fc3ee9d8a1
commit
d2fd93135c
|
@ -37,11 +37,11 @@ import java.io.PrintWriter;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
@ -98,7 +98,7 @@ public class ConfigAppearance_p {
|
|||
Iterator<String> it;
|
||||
try {
|
||||
final DigestURI u = new DigestURI(url, null);
|
||||
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
|
||||
} catch (final IOException e) {
|
||||
prop.put("status", "1");// unable to get URL
|
||||
prop.put("status_url", url);
|
||||
|
|
|
@ -37,11 +37,11 @@ import java.io.PrintWriter;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.data.WorkTables;
|
||||
import de.anomic.data.translator;
|
||||
import de.anomic.search.Switchboard;
|
||||
|
@ -103,7 +103,7 @@ public class ConfigLanguage_p {
|
|||
Iterator<String> it;
|
||||
try{
|
||||
final DigestURI u = new DigestURI(url, null);
|
||||
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
|
||||
}catch(final IOException e){
|
||||
prop.put("status", "1");//unable to get url
|
||||
prop.put("status_url", url);
|
||||
|
|
|
@ -36,13 +36,12 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
import net.yacy.kelondro.util.DateFormatter;
|
||||
import net.yacy.kelondro.util.MapTools;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.search.SwitchboardConstants;
|
||||
|
@ -149,7 +148,7 @@ public class Network {
|
|||
prop.put("table_my-url", seed.get(yacySeed.SEEDLIST, ""));
|
||||
|
||||
// generating the location string
|
||||
prop.putHTML("table_my-location", HTTPClient.generateLocation());
|
||||
prop.putHTML("table_my-location", MultiProtocolURI.generateLocation());
|
||||
}
|
||||
|
||||
// overall results: Network statistics
|
||||
|
@ -363,8 +362,8 @@ public class Network {
|
|||
prop.putHTML(STR_TABLE_LIST + conCount + "_fullname", seed.get(yacySeed.NAME, "deadlink"));
|
||||
userAgent = null;
|
||||
if (seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) {
|
||||
userAgent = HTTPLoader.yacyUserAgent;
|
||||
location = HTTPClient.generateLocation();
|
||||
userAgent = MultiProtocolURI.yacybotUserAgent;
|
||||
location = MultiProtocolURI.generateLocation();
|
||||
} else {
|
||||
userAgent = sb.peers.peerActions.getUserAgent(seed.getIP());
|
||||
location = parseLocationInUserAgent(userAgent);
|
||||
|
|
|
@ -39,7 +39,6 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.data.listManager;
|
||||
import de.anomic.data.list.ListAccumulator;
|
||||
import de.anomic.data.list.XMLBlacklistImporter;
|
||||
|
@ -48,6 +47,7 @@ import de.anomic.search.Switchboard;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.document.parser.html.CharacterCoding;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
@ -143,7 +143,7 @@ public class sharedBlacklist_p {
|
|||
// get List
|
||||
DigestURI u = new DigestURI(downloadURLOld, null);
|
||||
|
||||
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
|
||||
} catch (final Exception e) {
|
||||
prop.put("status", STATUS_PEER_UNKNOWN);
|
||||
prop.putHTML("status_name", Hash);
|
||||
|
@ -160,7 +160,7 @@ public class sharedBlacklist_p {
|
|||
|
||||
try {
|
||||
final DigestURI u = new DigestURI(downloadURL, null);
|
||||
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
|
||||
} catch (final Exception e) {
|
||||
prop.put("status", STATUS_URL_PROBLEM);
|
||||
prop.putHTML("status_address",downloadURL);
|
||||
|
|
|
@ -45,8 +45,6 @@ import net.yacy.kelondro.blob.BEncodedHeap;
|
|||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
import net.yacy.kelondro.io.ByteCount;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
public class RobotsTxt {
|
||||
|
||||
private static Logger log = Logger.getLogger(RobotsTxt.class);
|
||||
|
@ -325,7 +323,7 @@ public class RobotsTxt {
|
|||
RequestHeader reqHeaders = new RequestHeader();
|
||||
|
||||
// add yacybot user agent
|
||||
reqHeaders.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
|
||||
reqHeaders.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
|
||||
// adding referer
|
||||
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.net.MalformedURLException;
|
|||
import java.util.Date;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.ResponseHeader;
|
||||
|
@ -39,7 +40,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
|
|||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.io.ByteCountInputStream;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.search.Segments;
|
||||
import de.anomic.search.Switchboard;
|
||||
|
@ -62,7 +62,7 @@ public class SitemapImporter extends Thread {
|
|||
public void run() {
|
||||
// download document
|
||||
final RequestHeader requestHeader = new RequestHeader();
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(5000);
|
||||
client.setHeader(requestHeader.entrySet());
|
||||
|
|
|
@ -50,8 +50,6 @@ public final class HTTPLoader {
|
|||
private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7";
|
||||
public static final long DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
|
||||
public static final int DEFAULT_CRAWLING_RETRY_COUNT = 5;
|
||||
public static final String crawlerUserAgent = "yacybot (" + HTTPClient.getSystemOST() +") http://yacy.net/bot.html";
|
||||
public static final String yacyUserAgent = "yacy (" + HTTPClient.getSystemOST() +") yacy.net";
|
||||
|
||||
/**
|
||||
* The socket timeout that should be used
|
||||
|
@ -101,7 +99,7 @@ public final class HTTPLoader {
|
|||
|
||||
// create a request header
|
||||
final RequestHeader requestHeader = new RequestHeader();
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
DigestURI refererURL = null;
|
||||
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
|
||||
|
@ -216,7 +214,7 @@ public final class HTTPLoader {
|
|||
|
||||
// create a request header
|
||||
final RequestHeader requestHeader = new RequestHeader();
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE);
|
||||
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
|
||||
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);
|
||||
|
|
|
@ -71,6 +71,7 @@ import java.util.logging.LogManager;
|
|||
import java.util.logging.Logger;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
|
@ -86,7 +87,6 @@ import net.yacy.kelondro.logging.Log;
|
|||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.repository.Blacklist;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.crawler.retrieval.Response;
|
||||
//import de.anomic.http.client.Client;
|
||||
|
@ -98,6 +98,9 @@ import de.anomic.server.serverObjects;
|
|||
|
||||
public final class HTTPDProxyHandler {
|
||||
|
||||
|
||||
public static final String yacyUserAgent = "yacy (" + MultiProtocolURI.systemOST +") yacy.net";
|
||||
|
||||
// static variables
|
||||
// can only be instantiated upon first instantiation of this class object
|
||||
private static Switchboard sb = null;
|
||||
|
@ -1539,7 +1542,7 @@ public final class HTTPDProxyHandler {
|
|||
private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) {
|
||||
userAgentStr.setLength(0);
|
||||
|
||||
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyUserAgent);
|
||||
final int pos = browserUserAgent.lastIndexOf(')');
|
||||
if (pos >= 0) {
|
||||
userAgentStr
|
||||
|
|
|
@ -124,7 +124,6 @@ import de.anomic.crawler.ResultURLs;
|
|||
import de.anomic.crawler.RobotsTxt;
|
||||
import de.anomic.crawler.CrawlProfile.CacheStrategy;
|
||||
import de.anomic.crawler.retrieval.EventOrigin;
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.crawler.retrieval.Response;
|
||||
import de.anomic.data.LibraryProvider;
|
||||
|
@ -2406,7 +2405,7 @@ public final class Switchboard extends serverSwitch {
|
|||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
|
||||
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setHeader(reqHeader.entrySet());
|
||||
client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));
|
||||
|
@ -2557,7 +2556,7 @@ public final class Switchboard extends serverSwitch {
|
|||
*/
|
||||
public static Map<String, String> loadFileAsMap(final DigestURI url) {
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setHeader(reqHeader.entrySet());
|
||||
try {
|
||||
|
|
|
@ -23,11 +23,10 @@ package de.anomic.tools;
|
|||
|
||||
import java.util.Hashtable;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.http.ProxySettings;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
public class loaderThreads {
|
||||
|
||||
// global values for loader threads
|
||||
|
@ -116,7 +115,7 @@ public class loaderThreads {
|
|||
|
||||
public void run() {
|
||||
try {
|
||||
page = url.get(HTTPLoader.crawlerUserAgent, timeout);
|
||||
page = url.get(MultiProtocolURI.yacybotUserAgent, timeout);
|
||||
loaded = true;
|
||||
process.feed(page);
|
||||
if (process.status() == loaderCore.STATUS_FAILED) {
|
||||
|
|
|
@ -83,7 +83,6 @@ import org.apache.http.entity.mime.content.StringBody;
|
|||
|
||||
import de.anomic.crawler.ResultURLs;
|
||||
import de.anomic.crawler.retrieval.EventOrigin;
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.search.ContentDomain;
|
||||
import de.anomic.search.QueryParams;
|
||||
import de.anomic.search.RankingProfile;
|
||||
|
@ -99,10 +98,10 @@ public final class yacyClient {
|
|||
|
||||
|
||||
private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
|
||||
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
|
||||
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
|
||||
}
|
||||
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
|
||||
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
|
||||
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -138,7 +137,7 @@ public final class yacyClient {
|
|||
parts.put("seed", new StringBody(mySeed.genSeedStr(salt)));
|
||||
// send request
|
||||
final long start = System.currentTimeMillis();
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
|
||||
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
|
||||
result = FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
|
@ -331,7 +330,7 @@ public final class yacyClient {
|
|||
parts.put("call", new StringBody("remotecrawl"));
|
||||
parts.put("count", new StringBody(Integer.toString(maxCount)));
|
||||
parts.put("time", new StringBody(Long.toString(maxTime)));
|
||||
final byte[] result = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
|
||||
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
|
||||
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
|
||||
if (reader == null) {
|
||||
yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
|
||||
|
@ -629,8 +628,8 @@ public final class yacyClient {
|
|||
parts.put("profile", new StringBody(crypt.simpleEncode(rankingProfile.toExternalString())));
|
||||
parts.put("constraint", new StringBody((constraint == null) ? "" : constraint.exportB64()));
|
||||
if (secondarySearchSuperviser != null) parts.put("abstracts", new StringBody("auto"));
|
||||
resultMap = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
|
||||
//resultMap = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
|
||||
resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
|
||||
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
|
||||
|
||||
// evaluate request result
|
||||
if (resultMap == null || resultMap.isEmpty()) throw new IOException("resultMap is NULL");
|
||||
|
@ -750,7 +749,7 @@ public final class yacyClient {
|
|||
parts.put("filename", new StringBody(filename));
|
||||
parts.put("filesize", new StringBody(Long.toString(filesize)));
|
||||
parts.put("can-send-protocol", new StringBody("http"));
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
|
||||
final Map<String, String> result = FileUtils.table(content);
|
||||
return result;
|
||||
} catch (final Exception e) {
|
||||
|
@ -774,7 +773,7 @@ public final class yacyClient {
|
|||
parts.put("md5", new StringBody(Digest.encodeMD5Hex(file)));
|
||||
parts.put("access", new StringBody(access));
|
||||
parts.put("filename", new ByteArrayBody(file, filename));
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
|
||||
final Map<String, String> result = FileUtils.table(content);
|
||||
return result;
|
||||
} catch (final Exception e) {
|
||||
|
@ -851,7 +850,7 @@ public final class yacyClient {
|
|||
parts.put("wordh", new StringBody(wordhashes));
|
||||
parts.put("lurlEntry", new StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
|
||||
// send request
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
|
||||
return FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
// most probably a network time-out exception
|
||||
|
@ -1006,7 +1005,7 @@ public final class yacyClient {
|
|||
parts.put("wordc", new StringBody(Integer.toString(indexes.size())));
|
||||
parts.put("entryc", new StringBody(Integer.toString(indexcount)));
|
||||
parts.put("indexes", new StringBody(entrypost.toString()));
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final Iterator<String> v = FileUtils.strings(content);
|
||||
// this should return a list of urlhashes that are unknown
|
||||
|
||||
|
@ -1050,7 +1049,7 @@ public final class yacyClient {
|
|||
}
|
||||
try {
|
||||
parts.put("urlc", new StringBody(Integer.toString(urlc)));
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final Iterator<String> v = FileUtils.strings(content);
|
||||
|
||||
final Map<String, String> result = FileUtils.table(v);
|
||||
|
@ -1072,7 +1071,7 @@ public final class yacyClient {
|
|||
if (address == null) { address = "localhost:8080"; }
|
||||
try {
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
return FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
|
||||
|
@ -1162,7 +1161,7 @@ public final class yacyClient {
|
|||
}
|
||||
byte[] res;
|
||||
try {
|
||||
res = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(url, timeout, vhost, newpost);
|
||||
res = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(url, timeout, vhost, newpost);
|
||||
System.out.println(new String(res));
|
||||
} catch (IOException e1) {
|
||||
Log.logException(e1);
|
||||
|
|
|
@ -57,7 +57,6 @@ import net.yacy.kelondro.util.FileUtils;
|
|||
import net.yacy.kelondro.util.OS;
|
||||
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverCore;
|
||||
import de.anomic.tools.CryptoLib;
|
||||
|
@ -283,7 +282,7 @@ public final class yacyRelease extends yacyVersion {
|
|||
File download = null;
|
||||
// setup httpClient
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
|
||||
final String name = this.getUrl().getFileName();
|
||||
byte[] signatureBytes = null;
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.util.Map;
|
|||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
|
@ -52,7 +53,6 @@ import net.yacy.kelondro.order.Base64Order;
|
|||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.kelondro.util.kelondroException;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HTTPDemon;
|
||||
import de.anomic.http.server.AlternativeDomainNames;
|
||||
|
@ -846,7 +846,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
|
|||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
|
||||
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
|
||||
// init http-client
|
||||
// final Client client = new Client(10000, reqHeader);
|
||||
|
|
|
@ -49,7 +49,8 @@ import net.yacy.cora.protocol.http.HTTPClient;
|
|||
* MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
|
||||
*
|
||||
*/
|
||||
public class MultiProtocolURI implements Serializable {
|
||||
public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolURI> {
|
||||
|
||||
|
||||
private static final long serialVersionUID = -1173233022912141884L;
|
||||
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
|
||||
|
@ -78,10 +79,47 @@ public class MultiProtocolURI implements Serializable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* provide system information for client identification
|
||||
*/
|
||||
public static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
|
||||
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
|
||||
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
|
||||
|
||||
public static final String yacybotUserAgent = "yacybot (" + systemOST +") http://yacy.net/bot.html";
|
||||
|
||||
/**
|
||||
* generating the location string
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static String generateLocation() {
|
||||
String loc = System.getProperty("user.timezone", "nowhere");
|
||||
final int p = loc.indexOf('/');
|
||||
if (p > 0) {
|
||||
loc = loc.substring(0, p);
|
||||
}
|
||||
loc = loc + "/" + System.getProperty("user.language", "dumb");
|
||||
return loc;
|
||||
}
|
||||
|
||||
// class variables
|
||||
protected String protocol, host, userInfo, path, quest, ref;
|
||||
protected int port;
|
||||
|
||||
/**
|
||||
* initialization of a MultiProtocolURI to produce poison pills for concurrent blocking queues
|
||||
*/
|
||||
public MultiProtocolURI() {
|
||||
this.protocol = null;
|
||||
this.host = null;
|
||||
this.userInfo = null;
|
||||
this.path = null;
|
||||
this.quest = null;
|
||||
this.ref = null;
|
||||
this.port = -1;
|
||||
}
|
||||
|
||||
public MultiProtocolURI(final File file) throws MalformedURLException {
|
||||
this("file", "", -1, file.getAbsolutePath());
|
||||
}
|
||||
|
@ -762,9 +800,8 @@ public class MultiProtocolURI implements Serializable {
|
|||
return this.toString().equals(other.toString());
|
||||
}
|
||||
|
||||
public int compareTo(final Object h) {
|
||||
assert (h instanceof MultiProtocolURI);
|
||||
return this.toString().compareTo(((MultiProtocolURI) h).toString());
|
||||
public int compareTo(MultiProtocolURI h) {
|
||||
return this.toString().compareTo(h.toString());
|
||||
}
|
||||
|
||||
public boolean isPOST() {
|
||||
|
@ -1112,4 +1149,5 @@ public class MultiProtocolURI implements Serializable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -609,6 +609,28 @@ public class Domains {
|
|||
return localHostAddresses[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a list of intranet InetAddresses without the loopback address 127.0.0.1
|
||||
* @return list of all intranet addresses
|
||||
*/
|
||||
public static List<InetAddress> myIntranetIPs() {
|
||||
// list all local addresses
|
||||
if (localHostAddresses.length < 2) try {Thread.sleep(1000);} catch (InterruptedException e) {}
|
||||
ArrayList<InetAddress> list = new ArrayList<InetAddress>(localHostAddresses.length);
|
||||
if (localHostAddresses.length == 0) {
|
||||
if (localHostAddress != null && isLocal(localHostAddress.getHostAddress())) {
|
||||
list.add(localHostAddress);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
for (int i = 0; i < localHostAddresses.length; i++) {
|
||||
if ((0Xff & localHostAddresses[i].getAddress()[0]) == 127) continue;
|
||||
if (!matchesList(localHostAddresses[i].getHostAddress(), localhostPatterns)) continue;
|
||||
list.add(localHostAddresses[i]);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public static int getDomainID(final String host) {
|
||||
if (host == null || host.length() == 0) return TLD_Local_ID;
|
||||
if (isLocal(host)) return TLD_Local_ID;
|
||||
|
|
|
@ -44,6 +44,7 @@ import java.io.RandomAccessFile;
|
|||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.net.InetAddress;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.ServerSocket;
|
||||
import java.net.Socket;
|
||||
import java.net.SocketException;
|
||||
|
@ -85,7 +86,7 @@ public class FTPClient {
|
|||
private Socket ControlSocket = null;
|
||||
|
||||
// socket timeout
|
||||
private static final int ControlSocketTimeout = 10000;
|
||||
private static final int ControlSocketTimeout = 1000;
|
||||
|
||||
// data socket timeout
|
||||
private int DataSocketTimeout = 0; // in seconds (default infinite)
|
||||
|
@ -1515,13 +1516,14 @@ public class FTPClient {
|
|||
}
|
||||
|
||||
try {
|
||||
ControlSocket = new Socket(host, port);
|
||||
ControlSocket = new Socket();
|
||||
ControlSocket.setSoTimeout(getTimeout());
|
||||
ControlSocket.setKeepAlive(true);
|
||||
ControlSocket.setTcpNoDelay(true); // no accumulation until buffer is full
|
||||
ControlSocket.setSoLinger(false, getTimeout()); // !wait for all data being written on close()
|
||||
ControlSocket.setSendBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
|
||||
ControlSocket.setReceiveBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
|
||||
ControlSocket.connect(new InetSocketAddress(host, port), 1000);
|
||||
clientInput = new BufferedReader(new InputStreamReader(ControlSocket.getInputStream()));
|
||||
clientOutput = new DataOutputStream(new BufferedOutputStream(ControlSocket.getOutputStream()));
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@ import javax.net.ssl.SSLContext;
|
|||
import javax.net.ssl.TrustManager;
|
||||
import javax.net.ssl.X509TrustManager;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.ConnectionInfo;
|
||||
|
||||
import org.apache.http.Header;
|
||||
|
@ -93,7 +94,7 @@ public class HTTPClient {
|
|||
|
||||
private final static int maxcon = 20;
|
||||
private static IdledConnectionEvictor idledConnectionEvictor = null;
|
||||
private static HttpClient httpClient = null;
|
||||
private static HttpClient httpClient = initConnectionManager();
|
||||
private Header[] headers = null;
|
||||
private HttpResponse httpResponse = null;
|
||||
private HttpUriRequest currentRequest = null;
|
||||
|
@ -106,19 +107,13 @@ public class HTTPClient {
|
|||
|
||||
public HTTPClient() {
|
||||
super();
|
||||
if (httpClient == null) {
|
||||
initConnectionManager();
|
||||
}
|
||||
}
|
||||
|
||||
public static void setDefaultUserAgent(final String defaultAgent) {
|
||||
if (httpClient == null) {
|
||||
initConnectionManager();
|
||||
}
|
||||
HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent);
|
||||
}
|
||||
|
||||
private static void initConnectionManager() {
|
||||
private static HttpClient initConnectionManager() {
|
||||
// Create and initialize HTTP parameters
|
||||
final HttpParams httpParams = new BasicHttpParams();
|
||||
/**
|
||||
|
@ -141,7 +136,7 @@ public class HTTPClient {
|
|||
*/
|
||||
HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1);
|
||||
// UserAgent
|
||||
HttpProtocolParams.setUserAgent(httpParams, "yacy (" + systemOST +") yacy.net");
|
||||
HttpProtocolParams.setUserAgent(httpParams, MultiProtocolURI.yacybotUserAgent);
|
||||
HttpProtocolParams.setUseExpectContinue(httpParams, false); // IMPORTANT - if not set to 'false' then servers do not process the request until a time-out of 2 seconds
|
||||
/**
|
||||
* HTTP connection settings
|
||||
|
@ -175,7 +170,7 @@ public class HTTPClient {
|
|||
|
||||
idledConnectionEvictor = new IdledConnectionEvictor(clientConnectionManager);
|
||||
idledConnectionEvictor.start();
|
||||
|
||||
return httpClient;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -532,35 +527,6 @@ public class HTTPClient {
|
|||
upbytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* provide system information for client identification
|
||||
*/
|
||||
private static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
|
||||
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
|
||||
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
|
||||
|
||||
/**
|
||||
* generating the location string
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static String generateLocation() {
|
||||
String loc = System.getProperty("user.timezone", "nowhere");
|
||||
final int p = loc.indexOf('/');
|
||||
if (p > 0) {
|
||||
loc = loc.substring(0, p);
|
||||
}
|
||||
loc = loc + "/" + System.getProperty("user.language", "dumb");
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the systemOST
|
||||
*/
|
||||
public static String getSystemOST() {
|
||||
return systemOST;
|
||||
}
|
||||
|
||||
private static SSLSocketFactory getSSLSocketFactory() {
|
||||
final TrustManager trustManager = new X509TrustManager() {
|
||||
public void checkClientTrusted(X509Certificate[] chain, String authType)
|
||||
|
|
|
@ -41,8 +41,6 @@ import net.yacy.cora.protocol.http.HTTPConnector;
|
|||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
import org.apache.http.entity.mime.content.StringBody;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
public class Search {
|
||||
|
||||
public static BlockingQueue<RSSMessage> search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
|
||||
|
@ -128,7 +126,7 @@ public class Search {
|
|||
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
|
||||
parts.put("verify", new StringBody(verify ? "true" : "false"));
|
||||
parts.put("resource", new StringBody(global ? "global" : "local"));
|
||||
final byte[] result = HTTPConnector.getConnector(HTTPLoader.yacyUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
|
||||
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
|
||||
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
|
||||
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
|
||||
if (reader == null) {
|
||||
|
|
|
@ -32,8 +32,6 @@ import java.util.regex.Pattern;
|
|||
|
||||
import com.ibm.icu.text.CharsetDetector;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
|
@ -268,7 +266,7 @@ public class htmlParser extends AbstractParser implements Parser {
|
|||
MultiProtocolURI url;
|
||||
try {
|
||||
url = new MultiProtocolURI(args[0]);
|
||||
byte[] content = url.get(HTTPLoader.crawlerUserAgent, 3000);
|
||||
byte[] content = url.get(MultiProtocolURI.yacybotUserAgent, 3000);
|
||||
Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
|
||||
String title = document[0].dc_title();
|
||||
System.out.println(title);
|
||||
|
|
|
@ -201,7 +201,7 @@ public final class LoaderDispatcher {
|
|||
// create request header values and a response object because we need that
|
||||
// in case that we want to return the cached content in the next step
|
||||
final RequestHeader requestHeader = new RequestHeader();
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
|
||||
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
||||
DigestURI refererURL = null;
|
||||
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
|
||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
|
||||
|
|
|
@ -46,6 +46,7 @@ import java.util.concurrent.Semaphore;
|
|||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
import net.yacy.gui.YaCyApp;
|
||||
|
@ -289,7 +290,7 @@ public final class yacy {
|
|||
|
||||
// set user-agent
|
||||
final String userAgent = "yacy/" + Double.toString(version) + " (www.yacy.net; "
|
||||
+ HTTPClient.getSystemOST() + ")";
|
||||
+ MultiProtocolURI.systemOST + ")";
|
||||
HTTPClient.setDefaultUserAgent(userAgent);
|
||||
|
||||
// start main threads
|
||||
|
|
Loading…
Reference in New Issue
Block a user