enhanced Host Balancer strategy: fair round robin

This commit is contained in:
orbiter 2014-04-23 23:11:37 +02:00
parent 0c88a32c36
commit 2f63bd0261

View File

@ -239,37 +239,34 @@ public class HostBalancer implements Balancer {
// refresh the round-robin cache
this.roundRobinHostHashes.addAll(this.queues.keySet());
// quickly get rid of small stacks to reduce number of files:
if (this.roundRobinHostHashes.size() > 100) {
// if there are stacks with less than 10 entries, remove all stacks with more than 10 entries
// this shall kick out small stacks to prevent that too many files are opened for very wide crawls
boolean smallStacksExist = false;
boolean singletonStacksExist = false;
smallsearch: for (String s: this.roundRobinHostHashes) {
HostQueue hq = this.queues.get(s);
if (hq != null) {
int size = hq.size();
if (size == 1) {singletonStacksExist = true; break smallsearch;}
if (size <= 10) {smallStacksExist = true; break smallsearch;}
}
// remove all stacks with more than 10 entries
// this shall kick out small stacks to prevent that too many files are opened for very wide crawls
boolean smallStacksExist = false;
boolean singletonStacksExist = false;
smallsearch: for (String s: this.roundRobinHostHashes) {
HostQueue hq = this.queues.get(s);
if (hq != null) {
int size = hq.size();
if (size == 1) {singletonStacksExist = true; break smallsearch;}
if (size <= 10) {smallStacksExist = true; break smallsearch;}
}
if (singletonStacksExist) {
Iterator<String> i = this.roundRobinHostHashes.iterator();
while (i.hasNext()) {
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (hq.size() != 1 && delta > 10) {i.remove();}
}
} else if (smallStacksExist) {
Iterator<String> i = this.roundRobinHostHashes.iterator();
while (i.hasNext()) {
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (hq.size() > 10 && delta > 10) {i.remove();}
}
if (singletonStacksExist || smallStacksExist) {
Iterator<String> i = this.roundRobinHostHashes.iterator();
smallstacks: while (i.hasNext()) {
if (this.roundRobinHostHashes.size() <= 10) break smallstacks; // don't shrink the hosts until nothing is left
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue smallstacks;}
int size = hq.size();
if (singletonStacksExist) {
if (size != 1) {i.remove(); continue smallstacks;}
} else {
if (size > 10) {i.remove(); continue smallstacks;}
}
// to protect all small stacks which have a fast throughput, remove all with long wainting time
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (delta >= 1000) {i.remove();}
}
}
}
@ -292,9 +289,7 @@ public class HostBalancer implements Balancer {
}
if (rhq == null) {
// second strategy: take from the largest stack and clean round robin cache
// if we would not clear the round robin cache afterwards
// then all targets would be accessed equally which makes this strategy useless
// second strategy: take from the largest stack
int largest = Integer.MIN_VALUE;
for (String h: this.roundRobinHostHashes) {
HostQueue hq = this.queues.get(h);
@ -306,14 +301,30 @@ public class HostBalancer implements Balancer {
}
}
}
this.roundRobinHostHashes.clear(); // start from the beginning next time
rhq = this.queues.get(rhh);
}
}
if (rhq == null) continue tryagain;
long timestamp = System.currentTimeMillis();
Request request = rhq.pop(delay, cs, robots); // this pop is outside of synchronization to prevent blocking of pushes
long actualwaiting = System.currentTimeMillis() - timestamp;
if (actualwaiting > 1000) {
synchronized (this) {
// to prevent that this occurs again, remove all stacks with positive delay times (which may be less after that waiting)
Iterator<String> i = this.roundRobinHostHashes.iterator();
protectcheck: while (i.hasNext()) {
if (this.roundRobinHostHashes.size() <= 3) break protectcheck; // don't shrink the hosts until nothing is left
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue protectcheck;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (delta >= 0) {i.remove();}
}
}
}
int size = rhq.size();
if (size == 0) {
synchronized (this) {