mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- patch for bad web structure dumps
- added automatic slow down of accessed to specific domains when access to a web page fails git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5765 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
0139988c04
commit
b6c2167143
|
@ -45,6 +45,17 @@ public class Latency {
|
|||
}
|
||||
}
|
||||
|
||||
public static void slowdown(String hosthash, String host) {
|
||||
assert hosthash.length() == 6;
|
||||
Host h = map.get(hosthash);
|
||||
if (h == null) {
|
||||
h = new Host(host, 3000);
|
||||
map.put(hosthash, h);
|
||||
} else {
|
||||
h.slowdown();
|
||||
}
|
||||
}
|
||||
|
||||
public static Host host(String hosthash) {
|
||||
assert hosthash.length() == 6;
|
||||
return map.get(hosthash);
|
||||
|
@ -172,6 +183,11 @@ public class Latency {
|
|||
this.timeacc += time;
|
||||
this.count++;
|
||||
}
|
||||
public void slowdown() {
|
||||
this.lastacc = System.currentTimeMillis();
|
||||
this.timeacc = Math.min(60000, average() * 5);
|
||||
this.count = 1;
|
||||
}
|
||||
public int count() {
|
||||
return this.count;
|
||||
}
|
||||
|
|
|
@ -59,8 +59,10 @@ import org.apache.commons.httpclient.params.HttpMethodParams;
|
|||
import org.apache.commons.httpclient.protocol.Protocol;
|
||||
import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
|
||||
|
||||
import de.anomic.crawler.Latency;
|
||||
import de.anomic.kelondro.order.Base64Order;
|
||||
import de.anomic.kelondro.util.Log;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
|
||||
/**
|
||||
* HttpClient implementation which uses Jakarta Commons HttpClient 3.x {@link http://hc.apache.org/httpclient-3.x/}
|
||||
|
@ -448,14 +450,20 @@ public class httpClient {
|
|||
}
|
||||
} catch (final IllegalThreadStateException e) {
|
||||
// cleanUp statistics
|
||||
yacyURL url = new yacyURL(method.getURI().toString(), null);
|
||||
Latency.slowdown(url.hash().substring(6), url.getHost());
|
||||
HttpConnectionInfo.removeConnection(generateConInfo(method));
|
||||
throw e;
|
||||
} catch (final IOException e) {
|
||||
// cleanUp statistics
|
||||
yacyURL url = new yacyURL(method.getURI().toString(), null);
|
||||
Latency.slowdown(url.hash().substring(6), url.getHost());
|
||||
HttpConnectionInfo.removeConnection(generateConInfo(method));
|
||||
throw e;
|
||||
} catch (final IllegalStateException e) {
|
||||
// cleanUp statistics
|
||||
yacyURL url = new yacyURL(method.getURI().toString(), null);
|
||||
Latency.slowdown(url.hash().substring(6), url.getHost());
|
||||
HttpConnectionInfo.removeConnection(generateConInfo(method));
|
||||
throw new IOException(e.getMessage());
|
||||
}
|
||||
|
|
|
@ -184,9 +184,15 @@ public class plasmaWebStructure {
|
|||
final Map<String, Integer> map = new HashMap<String, Integer>();
|
||||
String c;
|
||||
final int refsc = refstr2count(refs);
|
||||
int d;
|
||||
for (int i = 0; i < refsc; i++) {
|
||||
c = refs.substring(8 + i * 10, 8 + (i + 1) * 10);
|
||||
map.put(c.substring(0, 6), Integer.valueOf(c.substring(6), 16));
|
||||
try {
|
||||
d = Integer.valueOf(c.substring(6), 16);
|
||||
} catch (NumberFormatException e) {
|
||||
d = 1;
|
||||
}
|
||||
map.put(c.substring(0, 6), d);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user