mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
refactoring towards a unified loading mechanism for MultiProtocolURIs
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7065 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
caece04f26
commit
5e7081cd19
|
@ -37,13 +37,10 @@ import java.io.PrintWriter;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HeaderFramework;
|
||||
import de.anomic.http.server.RequestHeader;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
|
@ -101,10 +98,7 @@ public class ConfigAppearance_p {
|
|||
Iterator<String> it;
|
||||
try {
|
||||
final DigestURI u = new DigestURI(url, null);
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
|
||||
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
|
||||
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
} catch (final IOException e) {
|
||||
prop.put("status", "1");// unable to get URL
|
||||
prop.put("status_url", url);
|
||||
|
|
|
@ -37,15 +37,12 @@ import java.io.PrintWriter;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
import de.anomic.data.WorkTables;
|
||||
import de.anomic.data.translator;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HeaderFramework;
|
||||
import de.anomic.http.server.RequestHeader;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
|
@ -106,10 +103,7 @@ public class ConfigLanguage_p {
|
|||
Iterator<String> it;
|
||||
try{
|
||||
final DigestURI u = new DigestURI(url, null);
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
// it = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000));
|
||||
it = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
|
||||
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
}catch(final IOException e){
|
||||
prop.put("status", "1");//unable to get url
|
||||
prop.put("status_url", url);
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.util.StringTokenizer;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.kelondro.order.Base64Order;
|
||||
import net.yacy.kelondro.order.Digest;
|
||||
import net.yacy.kelondro.util.DateFormatter;
|
||||
|
@ -62,7 +62,7 @@ public class SettingsAck_p {
|
|||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
// get referer for backlink
|
||||
final DigestURI referer = header.referer();
|
||||
final MultiProtocolURI referer = header.referer();
|
||||
prop.put("referer", (referer == null) ? "Settings_p.html" : referer.toNormalform(true, true));
|
||||
//if (post == null) System.out.println("POST: NULL"); else System.out.println("POST: " + post.toString());
|
||||
|
||||
|
|
|
@ -43,15 +43,12 @@ import de.anomic.crawler.retrieval.HTTPLoader;
|
|||
import de.anomic.data.listManager;
|
||||
import de.anomic.data.list.ListAccumulator;
|
||||
import de.anomic.data.list.XMLBlacklistImporter;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HeaderFramework;
|
||||
import de.anomic.http.server.RequestHeader;
|
||||
import de.anomic.search.SearchEventCache;
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.yacy.yacySeed;
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.document.parser.html.CharacterCoding;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
@ -143,16 +140,10 @@ public class sharedBlacklist_p {
|
|||
if (downloadURLOld != null) {
|
||||
// download the blacklist
|
||||
try {
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.PRAGMA,"no-cache");
|
||||
reqHeader.put(HeaderFramework.CACHE_CONTROL,"no-cache");
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
|
||||
// get List
|
||||
DigestURI u = new DigestURI(downloadURLOld, null);
|
||||
|
||||
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 1000));
|
||||
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 1000));
|
||||
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
} catch (final Exception e) {
|
||||
prop.put("status", STATUS_PEER_UNKNOWN);
|
||||
prop.putHTML("status_name", Hash);
|
||||
|
@ -169,10 +160,7 @@ public class sharedBlacklist_p {
|
|||
|
||||
try {
|
||||
final DigestURI u = new DigestURI(downloadURL, null);
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
// otherBlacklist = FileUtils.strings(Client.wget(u.toString(), reqHeader, 10000)); //get List
|
||||
otherBlacklist = FileUtils.strings(HttpConnector.wget(u.toString(), reqHeader.entrySet(), 10000));
|
||||
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
|
||||
} catch (final Exception e) {
|
||||
prop.put("status", STATUS_URL_PROBLEM);
|
||||
prop.putHTML("status_address",downloadURL);
|
||||
|
|
|
@ -127,7 +127,7 @@ public class FileLoader {
|
|||
}
|
||||
|
||||
// load the resource
|
||||
InputStream is = url.getInputStream();
|
||||
InputStream is = url.getInputStream(null, -1);
|
||||
byte[] b = FileUtils.read(is);
|
||||
is.close();
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ public class SMBLoader {
|
|||
}
|
||||
|
||||
// load the resource
|
||||
InputStream is = url.getInputStream();
|
||||
InputStream is = url.getInputStream(null, -1);
|
||||
byte[] b = FileUtils.read(is);
|
||||
is.close();
|
||||
|
||||
|
|
|
@ -385,7 +385,7 @@ public final class HTTPDProxyHandler {
|
|||
final Request request = new Request(
|
||||
null,
|
||||
url,
|
||||
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
|
||||
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
|
||||
"",
|
||||
cachedResponseHeader.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
|
@ -512,7 +512,7 @@ public final class HTTPDProxyHandler {
|
|||
final Request request = new Request(
|
||||
null,
|
||||
url,
|
||||
requestHeader.referer() == null ? null : requestHeader.referer().hash(),
|
||||
requestHeader.referer() == null ? null : new DigestURI(requestHeader.referer()).hash(),
|
||||
"",
|
||||
responseHeader.lastModified(),
|
||||
sb.crawler.defaultProxyProfile.handle(),
|
||||
|
@ -937,7 +937,7 @@ public final class HTTPDProxyHandler {
|
|||
|
||||
// the CONTENT_LENGTH will be added by entity and cause a ClientProtocolException if set
|
||||
final int contentLength = requestHeader.getContentLength();
|
||||
requestHeader.remove(RequestHeader.CONTENT_LENGTH);
|
||||
requestHeader.remove(HeaderFramework.CONTENT_LENGTH);
|
||||
|
||||
final HTTPClient client = setupHttpClient(requestHeader, connectHost);
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ import java.util.Map;
|
|||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.kelondro.util.DateFormatter;
|
||||
|
||||
import de.anomic.server.serverCore;
|
||||
|
@ -84,18 +84,18 @@ public class RequestHeader extends HeaderFramework {
|
|||
super(reverseMappingCache, othermap);
|
||||
}
|
||||
|
||||
public DigestURI referer() {
|
||||
public MultiProtocolURI referer() {
|
||||
String referer = get(REFERER, null);
|
||||
if (referer == null) return null;
|
||||
try {
|
||||
return new DigestURI(referer, null);
|
||||
return new MultiProtocolURI(referer);
|
||||
} catch (MalformedURLException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public String refererHost() {
|
||||
final DigestURI url = referer();
|
||||
final MultiProtocolURI url = referer();
|
||||
if (url == null) return null;
|
||||
return url.getHost();
|
||||
}
|
||||
|
|
|
@ -122,7 +122,7 @@ public class DocumentIndex extends Segment {
|
|||
if (!url.canRead()) throw new IOException("cannot read file");
|
||||
Document[] documents;
|
||||
try {
|
||||
documents = TextParser.parseSource(url, null, null, url.length(), url.getInputStream());
|
||||
documents = TextParser.parseSource(url, null, null, url.length(), url.getInputStream(null, -1));
|
||||
} catch (Exception e) {
|
||||
throw new IOException("cannot parse " + url.toString() + ": " + e.getMessage());
|
||||
}
|
||||
|
|
|
@ -281,12 +281,7 @@ public class serverSwitch {
|
|||
File ret;
|
||||
final String path = getConfig(key, dflt).replace('\\', '/');
|
||||
final File f = new File(path);
|
||||
if (f == null) {
|
||||
ret = null;
|
||||
} else {
|
||||
ret = (f.isAbsolute() ? new File(f.getAbsolutePath()) : new File(this.rootPath, path));
|
||||
}
|
||||
|
||||
ret = (f.isAbsolute() ? new File(f.getAbsolutePath()) : new File(this.rootPath, path));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,14 +23,10 @@ package de.anomic.tools;
|
|||
|
||||
import java.util.Hashtable;
|
||||
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.cora.protocol.http.ProxySettings;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HeaderFramework;
|
||||
import de.anomic.http.server.RequestHeader;
|
||||
|
||||
public class loaderThreads {
|
||||
|
||||
|
@ -120,10 +116,7 @@ public class loaderThreads {
|
|||
|
||||
public void run() {
|
||||
try {
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
|
||||
// page = Client.wget(url.toString(), reqHeader, timeout);
|
||||
page = HttpConnector.wget(url.toString(), reqHeader.entrySet(), timeout);
|
||||
page = url.get(HTTPLoader.crawlerUserAgent, timeout);
|
||||
loaded = true;
|
||||
process.feed(page);
|
||||
if (process.status() == loaderCore.STATUS_FAILED) {
|
||||
|
|
|
@ -47,7 +47,6 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
//import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
@ -57,11 +56,12 @@ import java.util.Map;
|
|||
import java.util.TreeMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.RSSFeed;
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.document.RSSReader;
|
||||
import net.yacy.cora.protocol.ByteArrayBody;
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.cora.protocol.http.HTTPConnector;
|
||||
import net.yacy.cora.services.Search;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
|
@ -78,18 +78,12 @@ import net.yacy.kelondro.util.ByteBuffer;
|
|||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.repository.Blacklist;
|
||||
|
||||
//import org.apache.commons.httpclient.methods.multipart.ByteArrayPartSource;
|
||||
//import org.apache.commons.httpclient.methods.multipart.Part;
|
||||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
import org.apache.http.entity.mime.content.StringBody;
|
||||
|
||||
import de.anomic.crawler.ResultURLs;
|
||||
import de.anomic.crawler.retrieval.EventOrigin;
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
//import de.anomic.http.client.DefaultCharsetStringPart;
|
||||
//import de.anomic.http.client.Client;
|
||||
import de.anomic.http.server.HeaderFramework;
|
||||
import de.anomic.http.server.RequestHeader;
|
||||
import de.anomic.search.RankingProfile;
|
||||
import de.anomic.search.RankingProcess;
|
||||
import de.anomic.search.SearchEvent;
|
||||
|
@ -103,25 +97,11 @@ import de.anomic.tools.crypt;
|
|||
public final class yacyClient {
|
||||
|
||||
|
||||
/**
|
||||
* @see wput
|
||||
* @param target
|
||||
* @param filename
|
||||
* @param post
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
// private static byte[] postToFile(final yacySeed target, final String filename, final List<Part> post, final int timeout) throws IOException {
|
||||
// return HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", post, timeout, false);
|
||||
// }
|
||||
// private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final List<Part> post, final int timeout) throws IOException {
|
||||
// return HttpConnector.wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", post, timeout, false);
|
||||
// }
|
||||
private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
|
||||
return HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", parts, timeout);
|
||||
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
|
||||
}
|
||||
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
|
||||
return HttpConnector.wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts, timeout);
|
||||
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -150,18 +130,14 @@ public final class yacyClient {
|
|||
|
||||
Map<String, String> result = null;
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
|
||||
try {
|
||||
// generate request
|
||||
// post.add(new DefaultCharsetStringPart("count", "20"));
|
||||
// post.add(new DefaultCharsetStringPart("seed", mySeed.genSeedStr(salt)));
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
|
||||
parts.put("count", new StringBody("20"));
|
||||
parts.put("seed", new StringBody(mySeed.genSeedStr(salt)));
|
||||
// send request
|
||||
final long start = System.currentTimeMillis();
|
||||
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", post, 10000, false);
|
||||
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts, 30000);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
|
||||
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
|
||||
result = FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
|
@ -266,13 +242,9 @@ public final class yacyClient {
|
|||
public static yacySeed querySeed(final yacySeed target, final String seedHash) {
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("object", "seed"));
|
||||
// post.add(new DefaultCharsetStringPart("env", seedHash));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = postToFile(target, "query.html", post, 10000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
parts.put("object", new StringBody("seed"));
|
||||
parts.put("env", new StringBody(seedHash));
|
||||
|
@ -291,14 +263,9 @@ public final class yacyClient {
|
|||
public static int queryRWICount(final yacySeed target, final String wordHash) {
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("object", "rwicount"));
|
||||
// post.add(new DefaultCharsetStringPart("ttl", "0"));
|
||||
// post.add(new DefaultCharsetStringPart("env", wordHash));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = postToFile(target, "query.html", post, 5000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
parts.put("object", new StringBody("rwicount"));
|
||||
parts.put("ttl", new StringBody("0"));
|
||||
|
@ -319,14 +286,9 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("object", "lurlcount"));
|
||||
// post.add(new DefaultCharsetStringPart("ttl", "0"));
|
||||
// post.add(new DefaultCharsetStringPart("env", ""));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = postToFile(target, "query.html", post, 5000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
parts.put("object", new StringBody("lurlcount"));
|
||||
parts.put("ttl", new StringBody("0"));
|
||||
|
@ -356,20 +318,15 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("call", "remotecrawl"));
|
||||
// post.add(new DefaultCharsetStringPart("count", Integer.toString(maxCount)));
|
||||
// post.add(new DefaultCharsetStringPart("time", Long.toString(maxTime)));
|
||||
|
||||
// send request
|
||||
try {
|
||||
/* a long time-out is needed */
|
||||
// final byte[] result = HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, (int) maxTime);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
parts.put("call", new StringBody("remotecrawl"));
|
||||
parts.put("count", new StringBody(Integer.toString(maxCount)));
|
||||
parts.put("time", new StringBody(Long.toString(maxTime)));
|
||||
final byte[] result = HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", parts, (int) maxTime);
|
||||
final byte[] result = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
|
||||
final RSSReader reader = RSSReader.parse(result);
|
||||
if (reader == null) {
|
||||
yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
|
||||
|
@ -442,25 +399,6 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("myseed", mySeed.genSeedStr(salt)));
|
||||
// post.add(new DefaultCharsetStringPart("count", Integer.toString(Math.max(10, count))));
|
||||
// post.add(new DefaultCharsetStringPart("resource", ((global) ? "global" : "local")));
|
||||
// post.add(new DefaultCharsetStringPart("partitions", Integer.toString(partitions)));
|
||||
// post.add(new DefaultCharsetStringPart("query", wordhashes));
|
||||
// post.add(new DefaultCharsetStringPart("exclude", excludehashes));
|
||||
// post.add(new DefaultCharsetStringPart("duetime", "1000"));
|
||||
// post.add(new DefaultCharsetStringPart("urls", urlhashes));
|
||||
// post.add(new DefaultCharsetStringPart("prefer", prefer.toString()));
|
||||
// post.add(new DefaultCharsetStringPart("filter", filter.toString()));
|
||||
// post.add(new DefaultCharsetStringPart("language", language));
|
||||
// post.add(new DefaultCharsetStringPart("sitehash", sitehash));
|
||||
// post.add(new DefaultCharsetStringPart("authorhash", authorhash));
|
||||
// post.add(new DefaultCharsetStringPart("ttl", "0"));
|
||||
// post.add(new DefaultCharsetStringPart("maxdist", Integer.toString(maxDistance)));
|
||||
// post.add(new DefaultCharsetStringPart("profile", crypt.simpleEncode(rankingProfile.toExternalString())));
|
||||
// post.add(new DefaultCharsetStringPart("constraint", (constraint == null) ? "" : constraint.exportB64()));
|
||||
// if (secondarySearchSuperviser != null) post.add(new DefaultCharsetStringPart("abstracts", "auto"));
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
boolean thisIsASecondarySearch = urlhashes.length() > 0;
|
||||
assert !thisIsASecondarySearch || secondarySearchSuperviser == null;
|
||||
|
@ -489,7 +427,7 @@ public final class yacyClient {
|
|||
parts.put("constraint", new StringBody((constraint == null) ? "" : constraint.exportB64()));
|
||||
if (secondarySearchSuperviser != null)
|
||||
parts.put("abstracts", new StringBody("auto"));
|
||||
result = FileUtils.table(HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", parts, 60000));
|
||||
result = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
|
||||
} catch (final IOException e) {
|
||||
yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore);
|
||||
//yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
|
||||
|
@ -693,12 +631,9 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetHash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("process", "permission"));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 5000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
|
||||
parts.put("process", new StringBody("permission"));
|
||||
final byte[] content = postToFile(seedDB, targetHash, "message.html", parts, 5000);
|
||||
|
@ -716,19 +651,9 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetHash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("process", "post"));
|
||||
// post.add(new DefaultCharsetStringPart("myseed", seedDB.mySeed().genSeedStr(salt)));
|
||||
// post.add(new DefaultCharsetStringPart("subject", subject));
|
||||
// try {
|
||||
// post.add(new DefaultCharsetStringPart("message", new String(message, "UTF-8")));
|
||||
// } catch (final UnsupportedEncodingException e) {
|
||||
// post.add(new DefaultCharsetStringPart("message", new String(message)));
|
||||
// }
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = postToFile(seedDB, targetHash, "message.html", post, 20000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetHash, salt);
|
||||
parts.put("process", new StringBody("post"));
|
||||
parts.put("myseed", new StringBody(seedDB.mySeed().genSeedStr(salt)));
|
||||
|
@ -765,23 +690,16 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
|
||||
// post.add(new DefaultCharsetStringPart("process", "permission"));
|
||||
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
|
||||
// post.add(new DefaultCharsetStringPart("filename", filename));
|
||||
// post.add(new DefaultCharsetStringPart("filesize", Long.toString(filesize)));
|
||||
// post.add(new DefaultCharsetStringPart("can-send-protocol", "http"));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 10000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
|
||||
parts.put("process", new StringBody("permission"));
|
||||
parts.put("purpose", new StringBody("crcon"));
|
||||
parts.put("filename", new StringBody(filename));
|
||||
parts.put("filesize", new StringBody(Long.toString(filesize)));
|
||||
parts.put("can-send-protocol", new StringBody("http"));
|
||||
final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, parts, 10000);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
|
||||
final Map<String, String> result = FileUtils.table(content);
|
||||
return result;
|
||||
} catch (final Exception e) {
|
||||
|
@ -795,17 +713,9 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), null, salt);
|
||||
// post.add(new DefaultCharsetStringPart("process", "store"));
|
||||
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
|
||||
// post.add(new DefaultCharsetStringPart("filesize", Long.toString(file.length)));
|
||||
// post.add(new DefaultCharsetStringPart("md5", Digest.encodeMD5Hex(file)));
|
||||
// post.add(new DefaultCharsetStringPart("access", access));
|
||||
// post.add(new DefaultCharsetFilePart("filename", new ByteArrayPartSource(filename, file)));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 20000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), null, salt);
|
||||
parts.put("process", new StringBody("store"));
|
||||
parts.put("purpose", new StringBody("crcon"));
|
||||
|
@ -813,7 +723,7 @@ public final class yacyClient {
|
|||
parts.put("md5", new StringBody(Digest.encodeMD5Hex(file)));
|
||||
parts.put("access", new StringBody(access));
|
||||
parts.put("filename", new ByteArrayBody(file, filename));
|
||||
final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, parts, 20000);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
|
||||
final Map<String, String> result = FileUtils.table(content);
|
||||
return result;
|
||||
} catch (final Exception e) {
|
||||
|
@ -874,13 +784,6 @@ public final class yacyClient {
|
|||
|
||||
// prepare request
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
// post.add(new DefaultCharsetStringPart("process", process));
|
||||
// post.add(new DefaultCharsetStringPart("urlhash", ((entry == null) ? "" : new String(entry.hash()))));
|
||||
// post.add(new DefaultCharsetStringPart("result", result));
|
||||
// post.add(new DefaultCharsetStringPart("reason", reason));
|
||||
// post.add(new DefaultCharsetStringPart("wordh", wordhashes));
|
||||
// post.add(new DefaultCharsetStringPart("lurlEntry", ((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
|
||||
|
||||
// determining target address
|
||||
final String address = target.getClusterAddress();
|
||||
|
@ -888,7 +791,6 @@ public final class yacyClient {
|
|||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", post, 10000);
|
||||
// prepare request
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), target.hash, salt);
|
||||
parts.put("process", new StringBody(process));
|
||||
|
@ -898,7 +800,7 @@ public final class yacyClient {
|
|||
parts.put("wordh", new StringBody(wordhashes));
|
||||
parts.put("lurlEntry", new StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
|
||||
// send request
|
||||
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", parts, 10000);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
|
||||
return FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
// most probably a network time-out exception
|
||||
|
@ -1020,13 +922,11 @@ public final class yacyClient {
|
|||
|
||||
// prepare post values
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
|
||||
// enabling gzip compression for post request body
|
||||
if (gzipBody && (targetSeed.getVersion() < yacyVersion.YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED)) {
|
||||
gzipBody = false;
|
||||
}
|
||||
// post.add(new DefaultCharsetStringPart("wordc", Integer.toString(indexes.size())));
|
||||
|
||||
int indexcount = 0;
|
||||
final StringBuilder entrypost = new StringBuilder(indexes.size() * 73);
|
||||
|
@ -1050,16 +950,12 @@ public final class yacyClient {
|
|||
result.put("unknownURL", "");
|
||||
return result;
|
||||
}
|
||||
|
||||
// post.add(new DefaultCharsetStringPart("entryc", Integer.toString(indexcount)));
|
||||
// post.add(new DefaultCharsetStringPart("indexes", entrypost.toString()));
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
parts.put("wordc", new StringBody(Integer.toString(indexes.size())));
|
||||
parts.put("entryc", new StringBody(Integer.toString(indexcount)));
|
||||
parts.put("indexes", new StringBody(entrypost.toString()));
|
||||
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", parts, timeout);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final Iterator<String> v = FileUtils.strings(content);
|
||||
// this should return a list of urlhashes that are unknown
|
||||
|
||||
|
@ -1080,7 +976,6 @@ public final class yacyClient {
|
|||
|
||||
// prepare post values
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
|
||||
// enabling gzip compression for post request body
|
||||
|
@ -1096,18 +991,15 @@ public final class yacyClient {
|
|||
resource = urls[i].toString();
|
||||
//System.out.println("*** DEBUG resource = " + resource);
|
||||
if (resource != null && resource.indexOf(0) == -1) {
|
||||
// post.add(new DefaultCharsetStringPart("url" + urlc, resource));
|
||||
parts.put("url" + urlc, new StringBody(resource));
|
||||
urlPayloadSize += resource.length();
|
||||
urlc++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// post.add(new DefaultCharsetStringPart("urlc", Integer.toString(urlc)));
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
|
||||
parts.put("urlc", new StringBody(Integer.toString(urlc)));
|
||||
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", parts, timeout);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
final Iterator<String> v = FileUtils.strings(content);
|
||||
|
||||
final Map<String, String> result = FileUtils.table(v);
|
||||
|
@ -1124,14 +1016,12 @@ public final class yacyClient {
|
|||
|
||||
// this post a message to the remote message board
|
||||
final String salt = crypt.randomSalt();
|
||||
// final List<Part> post = yacyNetwork.basicRequestPost(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
|
||||
String address = targetSeed.getClusterAddress();
|
||||
if (address == null) { address = "localhost:8080"; }
|
||||
try {
|
||||
// final byte[] content = HttpConnector.wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", post, 5000);
|
||||
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
|
||||
final byte[] content = HttpConnector.wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", parts, 5000);
|
||||
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
|
||||
return FileUtils.table(content);
|
||||
} catch (final Exception e) {
|
||||
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
|
||||
|
@ -1151,27 +1041,14 @@ public final class yacyClient {
|
|||
final byte[] wordhashe = Word.word2hash("test");
|
||||
//System.out.println("permission=" + permissionMessage(args[1]));
|
||||
|
||||
final RequestHeader reqHeader = new RequestHeader();
|
||||
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
|
||||
// final byte[] content = Client.wget(
|
||||
// "http://" + target.getPublicAddress() + "/yacy/search.html" +
|
||||
// "?myseed=" + sb.peers.mySeed().genSeedStr(null) +
|
||||
// "&youare=" + target.hash + "&key=" +
|
||||
// "&myseed=" + sb.peers.mySeed() .genSeedStr(null) +
|
||||
// "&count=10" +
|
||||
// "&resource=global" +
|
||||
// "&query=" + new String(wordhashe) +
|
||||
// "&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT),
|
||||
// reqHeader, 10000, target.getHexHash() + ".yacyh");
|
||||
final byte[] content = HttpConnector.wget("http://" + target.getPublicAddress() + "/yacy/search.html" +
|
||||
final byte[] content = new MultiProtocolURI("http://" + target.getPublicAddress() + "/yacy/search.html" +
|
||||
"?myseed=" + sb.peers.mySeed().genSeedStr(null) +
|
||||
"&youare=" + target.hash + "&key=" +
|
||||
"&myseed=" + sb.peers.mySeed() .genSeedStr(null) +
|
||||
"&count=10" +
|
||||
"&resource=global" +
|
||||
"&query=" + new String(wordhashe) +
|
||||
"&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT),
|
||||
reqHeader.entrySet(), 10000, target.getHexHash() + ".yacyh");
|
||||
"&network.unit.name=" + Switchboard.getSwitchboard().getConfig(SwitchboardConstants.NETWORK_NAME, yacySeed.DFLT_NETWORK_UNIT)).get(HTTPLoader.crawlerUserAgent, 10000);
|
||||
final Map<String, String> result = FileUtils.table(content);
|
||||
System.out.println("Result=" + result.toString());
|
||||
} catch (final Exception e) {
|
||||
|
@ -1181,9 +1058,9 @@ public final class yacyClient {
|
|||
} else if(args.length == 1) {
|
||||
System.out.println("wput Test");
|
||||
// connection params
|
||||
URL url = null;
|
||||
MultiProtocolURI url = null;
|
||||
try {
|
||||
url = new URL(args[0]);
|
||||
url = new MultiProtocolURI(args[0]);
|
||||
} catch (final MalformedURLException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
@ -1193,19 +1070,6 @@ public final class yacyClient {
|
|||
}
|
||||
final String vhost = url.getHost();
|
||||
final int timeout = 10000;
|
||||
// final boolean gzipBody = false;
|
||||
// // data
|
||||
// final List<Part> post = new ArrayList<Part>();
|
||||
// post.add(new DefaultCharsetStringPart("process", "permission"));
|
||||
// post.add(new DefaultCharsetStringPart("purpose", "crcon"));
|
||||
// //post.add(new FilePart("filename", new ByteArrayPartSource(filename, file)));
|
||||
// // do it!
|
||||
// try {
|
||||
// final byte[] response = HttpConnector.wput(url.toString(), vhost, post, timeout, gzipBody);
|
||||
// System.out.println(new String(response));
|
||||
// } catch (final IOException e) {
|
||||
// Log.logException(e);
|
||||
// }
|
||||
// new data
|
||||
final LinkedHashMap<String,ContentBody> newpost = new LinkedHashMap<String,ContentBody>();
|
||||
try {
|
||||
|
@ -1216,7 +1080,7 @@ public final class yacyClient {
|
|||
}
|
||||
byte[] res;
|
||||
try {
|
||||
res = HttpConnector.wput(url.toString(), vhost, newpost, timeout);
|
||||
res = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(url, timeout, vhost, newpost);
|
||||
System.out.println(new String(res));
|
||||
} catch (IOException e1) {
|
||||
Log.logException(e1);
|
||||
|
|
|
@ -21,6 +21,8 @@
|
|||
|
||||
package net.yacy.cora.document;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
|
@ -39,6 +41,7 @@ import jcifs.smb.SmbFile;
|
|||
import jcifs.smb.SmbFileInputStream;
|
||||
|
||||
import net.yacy.cora.document.Punycode.PunycodeException;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
import net.yacy.kelondro.util.Domains;
|
||||
|
||||
/**
|
||||
|
@ -934,12 +937,45 @@ public class MultiProtocolURI implements Serializable {
|
|||
return null;
|
||||
}
|
||||
|
||||
public InputStream getInputStream() throws IOException {
|
||||
public InputStream getInputStream(final String userAgent, final int timeout) throws IOException {
|
||||
if (isFile()) return new FileInputStream(getFSFile());
|
||||
if (isSMB()) return new SmbFileInputStream(getSmbFile());
|
||||
if (isHTTP() || isHTTPS()) {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(userAgent);
|
||||
client.setHost(this.getHost());
|
||||
return new ByteArrayInputStream(client.GETbytes(this.toNormalform(false, false)));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public byte[] get(final String userAgent, final int timeout) throws IOException {
|
||||
if (isFile()) return read(new FileInputStream(getFSFile()));
|
||||
if (isSMB()) return read(new SmbFileInputStream(getSmbFile()));
|
||||
if (isHTTP() || isHTTPS()) {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(userAgent);
|
||||
client.setHost(this.getHost());
|
||||
return client.GETbytes(this.toNormalform(false, false));
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static byte[] read(final InputStream source) throws IOException {
|
||||
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
final byte[] buffer = new byte[2048];
|
||||
int c;
|
||||
while ((c = source.read(buffer, 0, 2048)) > 0) baos.write(buffer, 0, c);
|
||||
baos.flush();
|
||||
baos.close();
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
|
||||
//---------------------
|
||||
|
||||
private static final String splitrex = " |/|\\(|\\)|-|\\:|_|\\.|,|\\?|!|'|" + '"';
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
/**
|
||||
* HttpConnector
|
||||
* Copyright 2010 by Michael Peter Christen
|
||||
* First released 25.05.2010 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.protocol;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Set;
|
||||
import java.util.Map.Entry;
|
||||
//import java.util.List;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
|
||||
//import org.apache.commons.httpclient.methods.multipart.Part;
|
||||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
// import de.anomic.http.client.Client;
|
||||
//import de.anomic.http.server.HeaderFramework;
|
||||
//import de.anomic.http.server.RequestHeader;
|
||||
//import de.anomic.http.server.ResponseContainer;
|
||||
|
||||
public class HttpConnector {
|
||||
|
||||
// /**
|
||||
// * send data to the server named by vhost
|
||||
// *
|
||||
// * @param address address of the server
|
||||
// * @param vhost name of the server at address which should respond
|
||||
// * @param post data to send (name-value-pairs)
|
||||
// * @param timeout in milliseconds
|
||||
// * @return response body
|
||||
// * @throws IOException
|
||||
// */
|
||||
// public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout) throws IOException {
|
||||
// return wput(url, vhost, post, timeout, false);
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * send data to the server named by vhost
|
||||
// *
|
||||
// * @param address address of the server
|
||||
// * @param vhost name of the server at address which should respond
|
||||
// * @param post data to send (name-value-pairs)
|
||||
// * @param timeout in milliseconds
|
||||
// * @param gzipBody send with content gzip encoded
|
||||
// * @return response body
|
||||
// * @throws IOException
|
||||
// */
|
||||
// public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout, final boolean gzipBody) throws IOException {
|
||||
// final RequestHeader header = new RequestHeader();
|
||||
// header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
// header.put(HeaderFramework.HOST, vhost);
|
||||
// final de.anomic.http.client.Client client = new de.anomic.http.client.Client(timeout, header);
|
||||
//
|
||||
// ResponseContainer res = null;
|
||||
// byte[] content = null;
|
||||
// try {
|
||||
// // send request/data
|
||||
// res = client.POST(url, post, gzipBody);
|
||||
// content = res.getData();
|
||||
// } finally {
|
||||
// if(res != null) {
|
||||
// // release connection
|
||||
// res.closeStream();
|
||||
// }
|
||||
// }
|
||||
// return content;
|
||||
// }
|
||||
|
||||
/**
|
||||
* send data to the server named by vhost
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param vhost name of the server at address which should respond
|
||||
* @param post data to send (name-value-pairs)
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] wput(final String url, final String vhost, LinkedHashMap<String,ContentBody> post, final int timeout) throws IOException {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(HTTPLoader.yacyUserAgent);
|
||||
client.setHost(vhost);
|
||||
|
||||
return client.POSTbytes(url, post);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get data from the server named by url
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] wget(final MultiProtocolURI url, final int timeout) throws IOException {
|
||||
return wget(url.toNormalform(false, false), url.getHost(), timeout);
|
||||
}
|
||||
|
||||
/**
|
||||
* get data from the server named by vhost
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param vhost name of the server at address which should respond
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(HTTPLoader.yacyUserAgent);
|
||||
client.setHost(vhost);
|
||||
|
||||
return client.GETbytes(url);
|
||||
}
|
||||
|
||||
/**
|
||||
* get data from the server named by vhost
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param entrys of RequestHeader
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout) throws IOException {
|
||||
return wget(url, entrys, timeout, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* get data from the server named by url
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param entrys of RequestHeader
|
||||
* @param timeout in milliseconds
|
||||
* @param vhost name of the server at address which should respond
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public static byte[] wget(final String url, final Set<Entry<String, String>> entrys, final int timeout, final String vhost) throws IOException {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setHeader(entrys);
|
||||
client.setTimout(timeout);
|
||||
client.setHost(vhost);
|
||||
|
||||
return client.GETbytes(url);
|
||||
}
|
||||
|
||||
// public static byte[] wget(final String url, final String vhost, final int timeout) throws IOException {
|
||||
// final RequestHeader header = new RequestHeader();
|
||||
// header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
|
||||
// header.put(HeaderFramework.HOST, vhost);
|
||||
// final de.anomic.http.client.Client client = new de.anomic.http.client.Client(timeout, header);
|
||||
//
|
||||
// ResponseContainer res = null;
|
||||
// byte[] content = null;
|
||||
// try {
|
||||
// // send request/data
|
||||
// res = client.GET(url);
|
||||
// content = res.getData();
|
||||
// } finally {
|
||||
// if(res != null) {
|
||||
// // release connection
|
||||
// res.closeStream();
|
||||
// }
|
||||
// }
|
||||
// return content;
|
||||
// }
|
||||
|
||||
}
|
|
@ -2048,14 +2048,14 @@ public class FTPClient {
|
|||
|
||||
// save ip address in high byte order
|
||||
// byte[] Bytes = LocalIp.getAddress();
|
||||
final byte[] Bytes = Domains.myPublicLocalIP().getHostAddress().getBytes();
|
||||
final byte[] b = Domains.myPublicLocalIP().getHostAddress().getBytes();
|
||||
|
||||
// bytes greater than 127 should not be printed as negative
|
||||
final short[] Shorts = new short[4];
|
||||
final short[] s = new short[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
Shorts[i] = Bytes[i];
|
||||
if (Shorts[i] < 0) {
|
||||
Shorts[i] += 256;
|
||||
s[i] = b[i];
|
||||
if (s[i] < 0) {
|
||||
s[i] += 256;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2064,7 +2064,7 @@ public class FTPClient {
|
|||
send("PORT "
|
||||
+
|
||||
// "127,0,0,1," +
|
||||
Shorts[0] + "," + Shorts[1] + "," + Shorts[2] + "," + Shorts[3] + "," + ((DataPort & 0xff00) >> 8)
|
||||
s[0] + "," + s[1] + "," + s[2] + "," + s[3] + "," + ((DataPort & 0xff00) >> 8)
|
||||
+ "," + (DataPort & 0x00ff));
|
||||
|
||||
// read status of the command from the control port
|
||||
|
|
|
@ -329,7 +329,7 @@ public class HTTPClient {
|
|||
* @return content bytes
|
||||
* @throws IOException
|
||||
*/
|
||||
public byte[] POSTbytes(final String uri, final LinkedHashMap<String,ContentBody> parts) throws IOException {
|
||||
public byte[] POSTbytes(final String uri, final LinkedHashMap<String, ContentBody> parts) throws IOException {
|
||||
final HttpPost httpPost = new HttpPost(uri);
|
||||
|
||||
final MultipartEntity multipartEntity = new MultipartEntity();
|
||||
|
|
71
source/net/yacy/cora/protocol/http/HTTPConnector.java
Normal file
71
source/net/yacy/cora/protocol/http/HTTPConnector.java
Normal file
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* HttpConnector
|
||||
* Copyright 2010 by Michael Peter Christen
|
||||
* First released 25.05.2010 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.protocol.http;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
|
||||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
|
||||
/**
|
||||
* This Connector is a convenience class to access the protocol-specific http client class.
|
||||
*/
|
||||
public class HTTPConnector {
|
||||
|
||||
private static final Map<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
|
||||
private String userAgent;
|
||||
|
||||
private HTTPConnector(String userAgent) {
|
||||
this.userAgent = userAgent;
|
||||
}
|
||||
|
||||
public static final HTTPConnector getConnector(String userAgent) {
|
||||
HTTPConnector c = cons.get(userAgent);
|
||||
if (c != null) return c;
|
||||
c = new HTTPConnector(userAgent);
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* send data to the server named by vhost
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param vhost name of the server at address which should respond
|
||||
* @param post data to send (name-value-pairs)
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post) throws IOException {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(this.userAgent);
|
||||
client.setHost(vhost);
|
||||
|
||||
return client.POSTbytes(url.toNormalform(false, false), post);
|
||||
}
|
||||
|
||||
}
|
71
source/net/yacy/cora/protocol/http/HttpConnector.java
Normal file
71
source/net/yacy/cora/protocol/http/HttpConnector.java
Normal file
|
@ -0,0 +1,71 @@
|
|||
/**
|
||||
* HttpConnector
|
||||
* Copyright 2010 by Michael Peter Christen
|
||||
* First released 25.05.2010 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.cora.protocol.http;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
|
||||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
|
||||
/**
|
||||
* This Connector is a convenience class to access the protocol-specific http client class.
|
||||
*/
|
||||
public class HTTPConnector {
|
||||
|
||||
private static final Map<String, HTTPConnector> cons = new ConcurrentHashMap<String, HTTPConnector>();
|
||||
private String userAgent;
|
||||
|
||||
private HTTPConnector(String userAgent) {
|
||||
this.userAgent = userAgent;
|
||||
}
|
||||
|
||||
public static final HTTPConnector getConnector(String userAgent) {
|
||||
HTTPConnector c = cons.get(userAgent);
|
||||
if (c != null) return c;
|
||||
c = new HTTPConnector(userAgent);
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* send data to the server named by vhost
|
||||
*
|
||||
* @param url address of the server
|
||||
* @param vhost name of the server at address which should respond
|
||||
* @param post data to send (name-value-pairs)
|
||||
* @param timeout in milliseconds
|
||||
* @return response body
|
||||
* @throws IOException
|
||||
*/
|
||||
public byte[] post(final MultiProtocolURI url, final int timeout, final String vhost, LinkedHashMap<String, ContentBody> post) throws IOException {
|
||||
final HTTPClient client = new HTTPClient();
|
||||
client.setTimout(timeout);
|
||||
client.setUserAgent(this.userAgent);
|
||||
client.setHost(vhost);
|
||||
|
||||
return client.POSTbytes(url.toNormalform(false, false), post);
|
||||
}
|
||||
|
||||
}
|
|
@ -34,13 +34,15 @@ import net.yacy.cora.document.MultiProtocolURI;
|
|||
import net.yacy.cora.document.RSSFeed;
|
||||
import net.yacy.cora.document.RSSMessage;
|
||||
import net.yacy.cora.document.RSSReader;
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.cora.protocol.http.HTTPConnector;
|
||||
|
||||
//import org.apache.commons.httpclient.methods.multipart.Part;
|
||||
//import org.apache.commons.httpclient.methods.multipart.StringPart;
|
||||
import org.apache.http.entity.mime.content.ContentBody;
|
||||
import org.apache.http.entity.mime.content.StringBody;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
public class Search {
|
||||
|
||||
public static BlockingQueue<RSSMessage> search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
|
||||
|
@ -118,24 +120,15 @@ public class Search {
|
|||
throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
|
||||
}
|
||||
|
||||
// prepare request
|
||||
// final List<Part> post = new ArrayList<Part>();
|
||||
// post.add(new StringPart("query", query, Charset.defaultCharset().name()));
|
||||
// post.add(new StringPart("startRecord", Integer.toString(startRecord), Charset.defaultCharset().name()));
|
||||
// post.add(new StringPart("maximumRecords", Long.toString(maximumRecords), Charset.defaultCharset().name()));
|
||||
// post.add(new StringPart("verify", verify ? "true" : "false", Charset.defaultCharset().name()));
|
||||
// post.add(new StringPart("resource", global ? "global" : "local", Charset.defaultCharset().name()));
|
||||
|
||||
// send request
|
||||
try {
|
||||
// final byte[] result = HttpConnector.wput(rssSearchServiceURL, uri.getHost(), post, (int) timeout);
|
||||
final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
|
||||
parts.put("query", new StringBody(query));
|
||||
parts.put("startRecord", new StringBody(Integer.toString(startRecord)));
|
||||
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
|
||||
parts.put("verify", new StringBody(verify ? "true" : "false"));
|
||||
parts.put("resource", new StringBody(global ? "global" : "local"));
|
||||
final byte[] result = HttpConnector.wput(rssSearchServiceURL, uri.getHost(), parts, (int) timeout);
|
||||
final byte[] result = HTTPConnector.getConnector(HTTPLoader.yacyUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
|
||||
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
|
||||
final RSSReader reader = RSSReader.parse(result);
|
||||
if (reader == null) {
|
||||
|
|
|
@ -28,8 +28,9 @@ import java.nio.charset.Charset;
|
|||
import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
|
||||
import de.anomic.crawler.retrieval.HTTPLoader;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.HttpConnector;
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.Parser;
|
||||
|
@ -233,7 +234,7 @@ public class htmlParser extends AbstractParser implements Parser {
|
|||
MultiProtocolURI url;
|
||||
try {
|
||||
url = new MultiProtocolURI(args[0]);
|
||||
byte[] content = HttpConnector.wget(url, 3000);
|
||||
byte[] content = url.get(HTTPLoader.crawlerUserAgent, 3000);
|
||||
Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
|
||||
String title = document[0].dc_title();
|
||||
System.out.println(title);
|
||||
|
|
|
@ -555,7 +555,6 @@ public class Domains {
|
|||
}
|
||||
|
||||
public static InetAddress myPublicLocalIP() {
|
||||
new localHostAddressLookup().start();
|
||||
// list all addresses
|
||||
// for (int i = 0; i < localHostAddresses.length; i++) System.out.println("IP: " + localHostAddresses[i].getHostAddress()); // DEBUG
|
||||
if (localHostAddresses.length == 0) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user