diff --git a/source/net/yacy/crawler/Balancer.java b/source/net/yacy/crawler/Balancer.java index 4926d3ffa..5389260f3 100644 --- a/source/net/yacy/crawler/Balancer.java +++ b/source/net/yacy/crawler/Balancer.java @@ -273,8 +273,9 @@ public class Balancer { assert s < this.urlFileIndex.size() : "hash = " + ASCII.String(hash) + ", s = " + s + ", size = " + this.urlFileIndex.size(); assert this.urlFileIndex.has(hash) : "hash = " + ASCII.String(hash); - // add the hash to a queue - pushHashToDomainStacks(entry.url().getHost(), entry.url().hash()); + // add the hash to a queue if the host is unknown to get this fast into the balancer + // now disabled to prevent that a crawl 'freezes' to a specific domain which hosts a lot of pages; the queues are filled anyway + //if (!this.domainStacks.containsKey(entry.url().getHost())) pushHashToDomainStacks(entry.url().getHost(), entry.url().hash()); } robots.ensureExist(entry.url(), Balancer.this.myAgentIDs, true); // concurrently load all robots.txt return null; diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index ee68896f0..094e5985d 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -376,7 +376,6 @@ public class Segment { int outlinksSame = document.inboundLinks().size(); int outlinksOther = document.outboundLinks().size(); final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult(); - int wordCount = 0; final int urlLength = urlNormalform.length(); final int urlComps = MultiProtocolURI.urlComps(url.toString()).length; @@ -409,7 +408,6 @@ public class Segment { } catch (final Exception e) { Log.logException(e); } - wordCount++; // during a search event it is possible that a heuristic is used which aquires index // data during search-time. To transfer indexed data directly to the search process