- patch for bad web structure dumps

- added automatic slow down of accessed to specific domains when access to a web page fails

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5765 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-04-01 13:21:47 +00:00
parent 0139988c04
commit b6c2167143
3 changed files with 31 additions and 1 deletions

View File

@ -45,6 +45,17 @@ public class Latency {
}
}
public static void slowdown(String hosthash, String host) {
assert hosthash.length() == 6;
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, 3000);
map.put(hosthash, h);
} else {
h.slowdown();
}
}
public static Host host(String hosthash) {
assert hosthash.length() == 6;
return map.get(hosthash);
@ -172,6 +183,11 @@ public class Latency {
this.timeacc += time;
this.count++;
}
public void slowdown() {
this.lastacc = System.currentTimeMillis();
this.timeacc = Math.min(60000, average() * 5);
this.count = 1;
}
public int count() {
return this.count;
}

View File

@ -59,8 +59,10 @@ import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.httpclient.protocol.Protocol;
import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
import de.anomic.crawler.Latency;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.util.Log;
import de.anomic.yacy.yacyURL;
/**
* HttpClient implementation which uses Jakarta Commons HttpClient 3.x {@link http://hc.apache.org/httpclient-3.x/}
@ -448,14 +450,20 @@ public class httpClient {
}
} catch (final IllegalThreadStateException e) {
// cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e;
} catch (final IOException e) {
// cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e;
} catch (final IllegalStateException e) {
// cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method));
throw new IOException(e.getMessage());
}

View File

@ -184,9 +184,15 @@ public class plasmaWebStructure {
final Map<String, Integer> map = new HashMap<String, Integer>();
String c;
final int refsc = refstr2count(refs);
int d;
for (int i = 0; i < refsc; i++) {
c = refs.substring(8 + i * 10, 8 + (i + 1) * 10);
map.put(c.substring(0, 6), Integer.valueOf(c.substring(6), 16));
try {
d = Integer.valueOf(c.substring(6), 16);
} catch (NumberFormatException e) {
d = 1;
}
map.put(c.substring(0, 6), d);
}
return map;
}