From 5a40ea7866fbea619416e22189606894c9b45452 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 2 Oct 2006 09:59:20 +0000 Subject: [PATCH] refactoring of wget string list generation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2692 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ConfigLanguage_p.java | 3 ++- htroot/ConfigSkins_p.java | 3 ++- htroot/sharedBlacklist_p.java | 5 +++-- htroot/xml/util/getpageinfo_p.java | 3 ++- source/de/anomic/http/httpc.java | 19 ++++++------------- source/de/anomic/net/natLib.java | 8 ++++---- source/de/anomic/tools/loaderCore.java | 3 +-- source/de/anomic/tools/loaderProcess.java | 3 +-- source/de/anomic/tools/loaderThreads.java | 13 +++++++------ source/de/anomic/tools/nxTools.java | 18 +++++++++++++++++- source/de/anomic/yacy/yacyPeerActions.java | 5 +++-- source/de/anomic/yacy/yacySeedDB.java | 5 +++-- 12 files changed, 51 insertions(+), 37 deletions(-) diff --git a/htroot/ConfigLanguage_p.java b/htroot/ConfigLanguage_p.java index 2be1424ee..8bfd0b916 100644 --- a/htroot/ConfigLanguage_p.java +++ b/htroot/ConfigLanguage_p.java @@ -62,6 +62,7 @@ import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.tools.nxTools; public class ConfigLanguage_p { @@ -97,7 +98,7 @@ public class ConfigLanguage_p { ArrayList langVector; try{ URL u = new URL(url); - langVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig); + langVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig)); }catch(IOException e){ prop.put("status", 1);//unable to get url prop.put("status_url", url); diff --git a/htroot/ConfigSkins_p.java b/htroot/ConfigSkins_p.java index 3a68e33b6..5e77bf11f 100644 --- a/htroot/ConfigSkins_p.java +++ b/htroot/ConfigSkins_p.java @@ -61,6 +61,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.tools.nxTools; public class ConfigSkins_p { @@ -126,7 +127,7 @@ public class ConfigSkins_p { ArrayList skinVector; try{ URL u = new URL(url); - skinVector = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig); + skinVector = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig)); }catch(IOException e){ prop.put("status", 1);//unable to get URL prop.put("status_url", url); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index d382f7518..2f04a4b88 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -62,6 +62,7 @@ import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.tools.nxTools; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; @@ -130,7 +131,7 @@ public class sharedBlacklist_p { // get List URL u = new URL(downloadURL); - otherBlacklist = httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader); + otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 12000, null, null, switchboard.remoteProxyConfig,reqHeader)); } catch (Exception e) { prop.put("status", STATUS_PEER_UNKNOWN); prop.put("page", 1); @@ -146,7 +147,7 @@ public class sharedBlacklist_p { try { URL u = new URL(downloadURL); - otherBlacklist = httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig); //get List + otherBlacklist = nxTools.strings(httpc.wget(u, u.getHost(), 6000, null, null, switchboard.remoteProxyConfig)); //get List } catch (Exception e) { prop.put("status", STATUS_URL_PROBLEM); prop.put("status_address",downloadURL); diff --git a/htroot/xml/util/getpageinfo_p.java b/htroot/xml/util/getpageinfo_p.java index 4d9f3edaa..e9aa642b1 100644 --- a/htroot/xml/util/getpageinfo_p.java +++ b/htroot/xml/util/getpageinfo_p.java @@ -56,6 +56,7 @@ import de.anomic.http.httpc; import de.anomic.net.URL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.tools.nxTools; public class getpageinfo_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { @@ -77,7 +78,7 @@ public class getpageinfo_p { } if (actions.indexOf("title")>=0) { try { - content = httpc.wget(new URL(url)); + content = nxTools.strings(httpc.wget(new URL(url))); Iterator it = content.iterator(); String line; diff --git a/source/de/anomic/http/httpc.java b/source/de/anomic/http/httpc.java index b01d8aad5..ee910c023 100644 --- a/source/de/anomic/http/httpc.java +++ b/source/de/anomic/http/httpc.java @@ -91,6 +91,7 @@ import de.anomic.server.serverCore; import de.anomic.server.serverFileUtils; import de.anomic.server.serverObjects; import de.anomic.server.logging.serverLog; +import de.anomic.tools.nxTools; /** * This class implements an http client. While http access is built-in in java @@ -1280,7 +1281,7 @@ do upload } } - public static ArrayList wget( + public static byte[] wget( URL url, String vhost, int timeout, @@ -1291,11 +1292,11 @@ do upload return wget(url, vhost,timeout,user,password,theRemoteProxyConfig,null); } - public static ArrayList wget(URL url) throws IOException{ + public static byte[] wget(URL url) throws IOException{ return wget(url, url.getHost(), 6000, null, null, plasmaSwitchboard.getSwitchboard().remoteProxyConfig, null); } - public static ArrayList wget( + public static byte[] wget( URL url, String vhost, int timeout, @@ -1352,15 +1353,7 @@ do upload } } - int s = 0; - int e; - ArrayList v = new ArrayList(); - while (s < a.length) { - e = s; while (e < a.length) if (a[e++] < 32) {e--; break;} - v.add(new String(a, s, e - s)); - s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;} - } - return v; + return a; } public static httpHeader whead( @@ -1466,7 +1459,7 @@ do upload httpRemoteProxyConfig theRemoteProxyConfig = httpRemoteProxyConfig.init(proxyHost,proxyPort); try { URL u = new URL(url); - text = wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig); + text = nxTools.strings(wget(u, u.getHost(), timeout, null, null, theRemoteProxyConfig)); } catch (MalformedURLException e) { System.out.println("The url '" + url + "' is wrong."); } catch (IOException e) { diff --git a/source/de/anomic/net/natLib.java b/source/de/anomic/net/natLib.java index 8b616b470..fce2d34b2 100644 --- a/source/de/anomic/net/natLib.java +++ b/source/de/anomic/net/natLib.java @@ -63,7 +63,7 @@ public class natLib { rm status.htm */ try { - ArrayList x = httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null); + ArrayList x = nxTools.strings(httpc.wget(new URL("http://192.168.0.1:80/status.htm"), "192.168.0.1", 5000, "admin", password, null)); x = nxTools.grep(x, 1, "IP Address"); if ((x == null) || (x.size() == 0)) return null; String line = nxTools.tail1(x); @@ -75,7 +75,7 @@ public class natLib { private static String getWhatIsMyIP() { try { - ArrayList x = httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null); + ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.whatismyip.com/"), "www.whatsmyip.com", 5000, null, null, null)); x = nxTools.grep(x, 0, "Your IP is"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 4); @@ -86,7 +86,7 @@ public class natLib { private static String getStanford() { try { - ArrayList x = httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null); + ArrayList x = nxTools.strings(httpc.wget(new URL("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"), "www.slac.stanford.edu", 5000, null, null, null)); x = nxTools.grep(x, 0, "firewall protecting your browser"); String line = nxTools.tail1(x); return nxTools.awk(line, " ", 7); @@ -97,7 +97,7 @@ public class natLib { private static String getIPID() { try { - ArrayList x = httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null); + ArrayList x = nxTools.strings(httpc.wget(new URL("http://ipid.shat.net/"), "ipid.shat.net", 5000, null, null, null)); x = nxTools.grep(x, 2, "Your IP address"); String line = nxTools.tail1(x); return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1); diff --git a/source/de/anomic/tools/loaderCore.java b/source/de/anomic/tools/loaderCore.java index c0ea0c7aa..4ef1d504f 100644 --- a/source/de/anomic/tools/loaderCore.java +++ b/source/de/anomic/tools/loaderCore.java @@ -41,7 +41,6 @@ package de.anomic.tools; -import java.util.ArrayList; import java.util.Properties; public abstract class loaderCore implements loaderProcess { @@ -63,7 +62,7 @@ public abstract class loaderCore implements loaderProcess { protected int completion = 0; // steering methods - public abstract void feed(ArrayList v); // returns true if process was successful; should be always synchronized + public abstract void feed(byte[] a); // returns true if process was successful; should be always synchronized public void terminate() { // if terminated before completion, completed() shows x < 100 diff --git a/source/de/anomic/tools/loaderProcess.java b/source/de/anomic/tools/loaderProcess.java index 531724f5f..246583c5c 100644 --- a/source/de/anomic/tools/loaderProcess.java +++ b/source/de/anomic/tools/loaderProcess.java @@ -41,13 +41,12 @@ package de.anomic.tools; -import java.util.ArrayList; import java.util.Properties; public interface loaderProcess { // steering methods - public void feed(ArrayList v); // returns true if process was successful; should be always synchronized + public void feed(byte[] v); // returns true if process was successful; should be always synchronized public void terminate(); // if terminated before completion, completed() shows x < 100 // feed-back methods diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index b54f1770d..011818aa3 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -133,7 +133,7 @@ public class loaderThreads { private URL url; private Exception error; private loaderProcess process; - private ArrayList page; + private byte[] page; private boolean loaded; public loaderThread(URL url, loaderProcess process) { @@ -193,16 +193,17 @@ public class loaderThreads { this.status = STATUS_READY; } - public synchronized void feed(ArrayList v) { + public synchronized void feed(byte[] v) { this.status = STATUS_RUNNING; this.completion = 1; int line = 0; String s, key, value; int p; + ArrayList lines = nxTools.strings(v); try { - while ((this.run) && (line < v.size())) { + while ((this.run) && (line < lines.size())) { // parse line and construct a property - s = (String) v.get(line); + s = (String) lines.get(line); if ((s != null) && ((p = s.indexOf('=')) > 0)) { key = s.substring(0, p).trim(); value = s.substring(p + 1).trim(); @@ -210,9 +211,9 @@ public class loaderThreads { } // update thread information line++; - this.completion = 100 * line / v.size(); + this.completion = 100 * line / lines.size(); } - if (line == v.size()) { + if (line == lines.size()) { this.status = STATUS_COMPLETED; return; } else { diff --git a/source/de/anomic/tools/nxTools.java b/source/de/anomic/tools/nxTools.java index b8cfc70f8..2eaf2664b 100644 --- a/source/de/anomic/tools/nxTools.java +++ b/source/de/anomic/tools/nxTools.java @@ -66,6 +66,10 @@ public class nxTools { return props; } + public static HashMap table(byte[] a) { + return table(strings(a)); + } + public static HashMap table(ArrayList list) { Iterator i = list.iterator(); int pos; @@ -78,7 +82,7 @@ public class nxTools { if (pos > 0) props.put(line.substring(0, pos).trim(), line.substring(pos + 1).trim()); } return props; - } + } public static Vector grep(Vector list, int afterContext, String pattern) { Enumeration i = list.elements(); @@ -144,6 +148,18 @@ public class nxTools { return null; } + public static ArrayList strings(byte[] a) { + int s = 0; + int e; + ArrayList v = new ArrayList(); + while (s < a.length) { + e = s; while (e < a.length) if (a[e++] < 32) {e--; break;} + v.add(new String(a, s, e - s)); + s = e; while (s < a.length) if (a[s++] >= 32) {s--; break;} + } + return v; + } + /** * This function shorten URL Strings
* diff --git a/source/de/anomic/yacy/yacyPeerActions.java b/source/de/anomic/yacy/yacyPeerActions.java index e472e9282..7aebb7313 100644 --- a/source/de/anomic/yacy/yacyPeerActions.java +++ b/source/de/anomic/yacy/yacyPeerActions.java @@ -61,6 +61,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverDate; import de.anomic.tools.disorderSet; +import de.anomic.tools.nxTools; public class yacyPeerActions { @@ -200,7 +201,7 @@ public class yacyPeerActions { yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)"); } else { ssc++; - seedList = httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader); + seedList = nxTools.strings(httpc.wget(url, url.getHost(), this.bootstrapLoadTimeout, null, null, this.sb.remoteProxyConfig,reqHeader)); enu = seedList.iterator(); lc = 0; while (enu.hasNext()) { @@ -254,7 +255,7 @@ public class yacyPeerActions { // read in remote file from url try { URL u = new URL(url); - ArrayList remote = httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig); + ArrayList remote = nxTools.strings(httpc.wget(u, u.getHost(), 5000, null, null, this.sb.remoteProxyConfig)); if ((remote != null) && (remote.size() > 0)) { Iterator e = remote.iterator(); while (e.hasNext()) { diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 3bfecd58c..fb28bda53 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -71,6 +71,7 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; +import de.anomic.tools.nxTools; public final class yacySeedDB { @@ -713,7 +714,7 @@ public final class yacySeedDB { httpHeader reqHeader = new httpHeader(); reqHeader.put(httpHeader.PRAGMA, "no-cache"); reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary? - ArrayList check = httpc.wget( + ArrayList check = nxTools.strings(httpc.wget( seedURL, seedURL.getHost(), 10000, @@ -721,7 +722,7 @@ public final class yacySeedDB { null, sb.remoteProxyConfig, reqHeader - ); + )); if (check == null) { serverLog.logFine("YACY","SaveSeedList: Testing download failed ...");