refactoring of wget string list generation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2692 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-10-02 09:59:20 +00:00
parent dbc2e039bb
commit 5a40ea7866
12 changed files with 51 additions and 37 deletions

View File

@ -62,6 +62,7 @@ import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
public class ConfigLanguage_p {
@ -97,7 +98,7 @@ public class ConfigLanguage_p {
ArrayList langVector;
try{
URL u = new URL(url);
langVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig);
langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig));
}catch(IOException e){
prop.put("status", 1);//unable to get url
prop.put("status_url", url);

View File

@ -61,6 +61,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
public class ConfigSkins_p {
@ -126,7 +127,7 @@ public class ConfigSkins_p {
ArrayList skinVector;
try{
URL u = new URL(url);
skinVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig);
skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig));
}catch(IOException e){
prop.put("status", 1);//unable to get URL
prop.put("status_url", url);

View File

@ -62,6 +62,7 @@ import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -130,7 +131,7 @@ public class sharedBlacklist_p {
// get List
URL u = new URL(downloadURL);
otherBlacklist = httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader);
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader));
} catch (Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.put("page", 1);
@ -146,7 +147,7 @@ public class sharedBlacklist_p {
try {
URL u = new URL(downloadURL);
otherBlacklist = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig); //get List
otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig)); //get List
} catch (Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.put("status_address",downloadURL);

View File

@ -56,6 +56,7 @@ import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
public class getpageinfo_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
@ -77,7 +78,7 @@ public class getpageinfo_p {
}
if (actions.indexOf("title")>=0) {
try {
content = httpc.wget(new URL(url));
content = nxTools.strings(httpc.wget(new URL(url)));
Iterator it = content.iterator();
String line;

View File

@ -91,6 +91,7 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;
/**
* This class implements an http client. While http access is built-in in java
@ -1280,7 +1281,7 @@ do upload
}
}
public static ArrayList wget(
public static byte[] wget(
URL url,
String vhost,
int timeout,
@ -1291,11 +1292,11 @@ do upload
return wget(url, vhost,timeout,user,password,theRemoteProxyConfig,null);
}
public static ArrayList wget(URL url) throws IOException{
public static byte[] wget(URL url) throws IOException{
return wget(url, url.getHost(), 6000, null, null, plasmaSwitchboard.getSwitchboard().remoteProxyConfig, null);
}
public static ArrayList wget(
public static byte[] wget(
URL url,
String vhost,
int timeout,
@ -1352,15 +1353,7 @@ do upload
}
}
int s = 0;
int e;
ArrayList v = new ArrayList();
while (s < a.length) {
e = s; while (e < a.length) if (a[e++] < 32) {e--; break;}
v.add(new String(a, s, e - s));
s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;}
}
return v;
return a;
}
public static httpHeader whead(
@ -1466,7 +1459,7 @@ do upload
httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort);
try {
URL u = new URL(url);
text = wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig);
text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig));
} catch (MalformedURLException e) {
System.out.println("The url '" + url + "' is wrong.");
} catch (IOException e) {

View File

@ -63,7 +63,7 @@ public class natLib {
rm status.htm
*/
try {
ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null));
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
@ -75,7 +75,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null));
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@ -86,7 +86,7 @@ public class natLib {
private static String getStanford() {
try {
ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null));
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
@ -97,7 +97,7 @@ public class natLib {
private static String getIPID() {
try {
ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null);
ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null));
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);

View File

@ -41,7 +41,6 @@
package de.anomic.tools;
import java.util.ArrayList;
import java.util.Properties;
public abstract class loaderCore implements loaderProcess {
@ -63,7 +62,7 @@ public abstract class loaderCore implements loaderProcess {
protected int completion = 0;
// steering methods
public abstract void feed(ArrayList v); // returns true if process was successful; should be always synchronized
public abstract void feed(byte[] a); // returns true if process was successful; should be always synchronized
public void terminate() {
// if terminated before completion, completed() shows x < 100

View File

@ -41,13 +41,12 @@
package de.anomic.tools;
import java.util.ArrayList;
import java.util.Properties;
public interface loaderProcess {
// steering methods
public void feed(ArrayList v); // returns true if process was successful; should be always synchronized
public void feed(byte[] v); // returns true if process was successful; should be always synchronized
public void terminate(); // if terminated before completion, completed() shows x < 100
// feed-back methods

View File

@ -133,7 +133,7 @@ public class loaderThreads {
private URL url;
private Exception error;
private loaderProcess process;
private ArrayList page;
private byte[] page;
private boolean loaded;
public loaderThread(URL url, loaderProcess process) {
@ -193,16 +193,17 @@ public class loaderThreads {
this.status = STATUS_READY;
}
public synchronized void feed(ArrayList v) {
public synchronized void feed(byte[] v) {
this.status = STATUS_RUNNING;
this.completion = 1;
int line = 0;
String s, key, value;
int p;
ArrayList lines = nxTools.strings(v);
try {
while ((this.run) && (line < v.size())) {
while ((this.run) && (line < lines.size())) {
// parse line and construct a property
s = (String) v.get(line);
s = (String) lines.get(line);
if ((s != null) && ((p = s.indexOf('=')) > 0)) {
key = s.substring(0, p).trim();
value = s.substring(p + 1).trim();
@ -210,9 +211,9 @@ public class loaderThreads {
}
// update thread information
line++;
this.completion = 100 * line / v.size();
this.completion = 100 * line / lines.size();
}
if (line == v.size()) {
if (line == lines.size()) {
this.status = STATUS_COMPLETED;
return;
} else {

View File

@ -66,6 +66,10 @@ public class nxTools {
return props;
}
public static HashMap table(byte[] a) {
return table(strings(a));
}
public static HashMap table(ArrayList list) {
Iterator i = list.iterator();
int pos;
@ -78,7 +82,7 @@ public class nxTools {
if (pos > 0) props.put(line.substring(0, pos).trim(), line.substring(pos + 1).trim());
}
return props;
}
}
public static Vector grep(Vector list, int afterContext, String pattern) {
Enumeration i = list.elements();
@ -144,6 +148,18 @@ public class nxTools {
return null;
}
public static ArrayList strings(byte[] a) {
int s = 0;
int e;
ArrayList v = new ArrayList();
while (s < a.length) {
e = s; while (e < a.length) if (a[e++] < 32) {e--; break;}
v.add(new String(a, s, e - s));
s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;}
}
return v;
}
/**
* This function shorten URL Strings<br>
*

View File

@ -61,6 +61,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.tools.disorderSet;
import de.anomic.tools.nxTools;
public class yacyPeerActions {
@ -200,7 +201,7 @@ public class yacyPeerActions {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
seedList = httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader);
seedList = nxTools.strings(httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader));
enu = seedList.iterator();
lc = 0;
while (enu.hasNext()) {
@ -254,7 +255,7 @@ public class yacyPeerActions {
// read in remote file from url
try {
URL u = new URL(url);
ArrayList remote = httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig);
ArrayList remote = nxTools.strings(httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig));
if ((remote != null) && (remote.size() > 0)) {
Iterator e = remote.iterator();
while (e.hasNext()) {

View File

@ -71,6 +71,7 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.nxTools;
public final class yacySeedDB {
@ -713,7 +714,7 @@ public final class yacySeedDB {
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA, "no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
ArrayList check = httpc.wget(
ArrayList check = nxTools.strings(httpc.wget(
seedURL,
seedURL.getHost(),
10000,
@ -721,7 +722,7 @@ public final class yacySeedDB {
null,
sb.remoteProxyConfig,
reqHeader
);
));
if (check == null) {
serverLog.logFine("YACY","SaveSeedList: Testing download failed ...");