yacy_search_server/source/net/yacy/crawler/LegacyBalancer.java
2014-04-18 06:50:07 +02:00

636 lines
29 KiB
Java

// LegacyBalancer.java
// -----------------------
// part of YaCy
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// created: 24.09.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.crawler;
import java.io.File;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.sorting.OrderedScoreMap;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.Latency;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.BufferedObjectIndex;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.table.Table;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
public class LegacyBalancer implements Balancer {
private static final String indexSuffix = "A.db";
private static final int EcoFSBufferSize = 1000;
private static final int objectIndexBufferSize = 1000;
private static final int MAX_DOUBLE_PUSH_CHECK = 100000;
// class variables filled with external values
private final File cacheStacksPath;
private BufferedObjectIndex urlFileIndex;
// class variables computed during operation
private final ConcurrentMap<String, HostHandles> domainStacks; // a map from host name to lists with url hashs
private final HandleSet double_push_check; // for debugging
private long lastDomainStackFill;
private int domStackInitSize;
private final List<Map.Entry<String, byte[]>> zeroWaitingCandidates;
private final Random random; // used to alternate between choose-from-maxstack or choose from any zero-waiting
private static class HostHandles {
public String hosthash;
public HandleSet handleSet;
public HostHandles(final String hosthash, final HandleSet handleSet) {
this.hosthash = hosthash;
this.handleSet = handleSet;
}
}
public LegacyBalancer(
final File cachePath,
final String stackname,
final boolean useTailCache,
final boolean exceed134217727) {
this.cacheStacksPath = cachePath;
this.domainStacks = new ConcurrentHashMap<String, HostHandles>();
this.domStackInitSize = Integer.MAX_VALUE;
this.double_push_check = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 0);
this.zeroWaitingCandidates = new ArrayList<Map.Entry<String, byte[]>>();
this.random = new Random(System.currentTimeMillis());
// create a stack for newly entered entries
if (!(cachePath.exists())) cachePath.mkdir(); // make the path
this.cacheStacksPath.mkdirs();
final File f = new File(this.cacheStacksPath, stackname + indexSuffix);
try {
this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727, true), objectIndexBufferSize);
} catch (final SpaceExceededException e) {
try {
this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, 0, 0, false, exceed134217727, true), objectIndexBufferSize);
} catch (final SpaceExceededException e1) {
ConcurrentLog.logException(e1);
}
}
this.lastDomainStackFill = 0;
ConcurrentLog.info("Balancer", "opened balancer file with " + this.urlFileIndex.size() + " entries from " + f.toString());
}
@Override
public synchronized void close() {
if (this.urlFileIndex != null) {
this.urlFileIndex.close();
this.urlFileIndex = null;
}
}
@Override
public void clear() {
ConcurrentLog.info("Balancer", "cleaning balancer with " + this.urlFileIndex.size() + " entries from " + this.urlFileIndex.filename());
try {
this.urlFileIndex.clear();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
this.domainStacks.clear();
this.double_push_check.clear();
}
@Override
public Request get(final byte[] urlhash) throws IOException {
assert urlhash != null;
if (this.urlFileIndex == null) return null; // case occurs during shutdown
final Row.Entry entry = this.urlFileIndex.get(urlhash, false);
if (entry == null) return null;
return new Request(entry);
}
@Override
public int removeAllByProfileHandle(final String profileHandle, final long timeout) throws IOException, SpaceExceededException {
// removes all entries with a specific profile hash.
// this may last some time
// returns number of deletions
// first find a list of url hashes that shall be deleted
final HandleSet urlHashes = new RowHandleSet(this.urlFileIndex.row().primaryKeyLength, Base64Order.enhancedCoder, 100);
final long terminate = timeout == Long.MAX_VALUE ? Long.MAX_VALUE : (timeout > 0) ? System.currentTimeMillis() + timeout : Long.MAX_VALUE;
synchronized (this) {
final Iterator<Row.Entry> i = this.urlFileIndex.rows();
Row.Entry rowEntry;
Request crawlEntry;
while (i.hasNext() && (System.currentTimeMillis() < terminate)) {
rowEntry = i.next();
crawlEntry = new Request(rowEntry);
if (crawlEntry.profileHandle().equals(profileHandle)) {
urlHashes.put(crawlEntry.url().hash());
}
}
}
// then delete all these urls from the queues and the file index
return remove(urlHashes);
}
/**
* this method is only here, because so many import/export methods need it
and it was implemented in the previous architecture
however, usage is not recommended
* @param urlHashes, a list of hashes that shall be removed
* @return number of entries that had been removed
* @throws IOException
*/
@Override
public synchronized int remove(final HandleSet urlHashes) throws IOException {
final int s = this.urlFileIndex.size();
int removedCounter = 0;
for (final byte[] urlhash: urlHashes) {
final Row.Entry entry = this.urlFileIndex.remove(urlhash);
if (entry != null) removedCounter++;
// remove from double-check caches
this.double_push_check.remove(urlhash);
}
if (removedCounter == 0) return 0;
assert this.urlFileIndex.size() + removedCounter == s : "urlFileIndex.size() = " + this.urlFileIndex.size() + ", s = " + s;
// iterate through the domain stacks
final Iterator<Map.Entry<String, HostHandles>> q = this.domainStacks.entrySet().iterator();
HandleSet stack;
while (q.hasNext()) {
stack = q.next().getValue().handleSet;
for (final byte[] handle: urlHashes) stack.remove(handle);
if (stack.isEmpty()) q.remove();
}
// iterate through zero-waiting map
final Iterator<Map.Entry<String, byte[]>> i = this.zeroWaitingCandidates.iterator();
while (i.hasNext()) {
if (urlHashes.has(i.next().getValue())) i.remove();
}
return removedCounter;
}
@Override
public boolean has(final byte[] urlhashb) {
return this.urlFileIndex.has(urlhashb) || this.double_push_check.has(urlhashb);
}
@Override
public int size() {
return this.urlFileIndex.size();
}
@Override
public boolean isEmpty() {
return this.urlFileIndex.isEmpty();
}
/**
* push a crawl request on the balancer stack
* @param entry
* @return null if this was successful or a String explaining what went wrong in case of an error
* @throws IOException
* @throws SpaceExceededException
*/
@Override
public String push(final Request entry, CrawlProfile profile, final RobotsTxt robots) throws IOException, SpaceExceededException {
assert entry != null;
final byte[] hash = entry.url().hash();
synchronized (this) {
// double-check
if (this.double_push_check.has(hash)) return "double occurrence in double_push_check";
if (this.urlFileIndex.has(hash)) return "double occurrence in urlFileIndex";
if (this.double_push_check.size() > MAX_DOUBLE_PUSH_CHECK || MemoryControl.shortStatus()) this.double_push_check.clear();
this.double_push_check.put(hash);
// increase dom counter
if (profile != null && profile.domMaxPages() != Integer.MAX_VALUE && profile.domMaxPages() > 0) {
profile.domInc(entry.url().getHost());
}
// add to index
final int s = this.urlFileIndex.size();
this.urlFileIndex.put(entry.toRow());
assert s < this.urlFileIndex.size() : "hash = " + ASCII.String(hash) + ", s = " + s + ", size = " + this.urlFileIndex.size();
assert this.urlFileIndex.has(hash) : "hash = " + ASCII.String(hash);
// add the hash to a queue if the host is unknown to get this fast into the balancer
// now disabled to prevent that a crawl 'freezes' to a specific domain which hosts a lot of pages; the queues are filled anyway
//if (!this.domainStacks.containsKey(entry.url().getHost())) pushHashToDomainStacks(entry.url().getHost(), entry.url().hash());
}
robots.ensureExist(entry.url(), profile.getAgent(), true); // concurrently load all robots.txt
return null;
}
/**
* get a list of domains that are currently maintained as domain stacks
* @return a map of clear text strings of host names to an integer array: {the size of the domain stack, guessed delta waiting time}
*/
@Override
public Map<String, Integer[]> getDomainStackHosts(RobotsTxt robots) {
Map<String, Integer[]> map = new TreeMap<String, Integer[]>(); // we use a tree map to get a stable ordering
for (Map.Entry<String, HostHandles> entry: this.domainStacks.entrySet()) {
final String hostname = entry.getKey();
final HostHandles hosthandles = entry.getValue();
int size = hosthandles.handleSet.size();
int delta = Latency.waitingRemainingGuessed(hostname, hosthandles.hosthash, robots, ClientIdentification.yacyInternetCrawlerAgent);
map.put(hostname, new Integer[]{size, delta});
}
return map;
}
/**
* get lists of crawl request entries for a specific host
* @param host
* @param maxcount
* @param maxtime
* @return a list of crawl loader requests
*/
@Override
public List<Request> getDomainStackReferences(final String host, int maxcount, final long maxtime) {
final HostHandles hh = this.domainStacks.get(host);
if (hh == null) return new ArrayList<Request>(0);
final HandleSet domainList = hh.handleSet;
if (domainList.isEmpty()) return new ArrayList<Request>(0);
maxcount = Math.min(maxcount, domainList.size());
final ArrayList<Request> cel = new ArrayList<Request>(maxcount);
long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
for (int i = 0; i < maxcount; i++) {
final byte[] urlhash = domainList.getOne(i);
if (urlhash == null) continue;
Row.Entry rowEntry;
try {
rowEntry = this.urlFileIndex.get(urlhash, true);
} catch (final IOException e) {
continue;
}
if (rowEntry == null) continue;
Request crawlEntry;
try {
crawlEntry = new Request(rowEntry);
} catch (final IOException e) {
continue;
}
cel.add(crawlEntry);
if (System.currentTimeMillis() > timeout) break;
}
return cel;
}
private void pushHashToDomainStacks(String host, String hosthash, final byte[] urlhash) throws SpaceExceededException {
// extend domain stack
if (host == null) host = Domains.LOCALHOST;
HostHandles hh = this.domainStacks.get(host);
if (hh == null) {
// create new list
HandleSet domainList = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 1);
domainList.put(urlhash);
this.domainStacks.put(host, new HostHandles(hosthash, domainList));
} else {
HandleSet domainList = hh.handleSet;
// extend existent domain list
domainList.put(urlhash);
}
}
private void removeHashFromDomainStacks(String host, final byte[] urlhash) {
// reduce domain stack
if (host == null) host = Domains.LOCALHOST;
HostHandles hh = this.domainStacks.get(host);
if (hh == null) {
this.domainStacks.remove(host);
return;
}
HandleSet domainList = hh.handleSet;
domainList.remove(urlhash);
if (domainList.isEmpty()) this.domainStacks.remove(host);
}
/**
* get the next entry in this crawl queue in such a way that the domain access time delta is maximized
* and always above the given minimum delay time. An additional delay time is computed using the robots.txt
* crawl-delay time which is always respected. In case the minimum time cannot ensured, this method pauses
* the necessary time until the url is released and returned as CrawlEntry object. In case that a profile
* for the computed Entry does not exist, null is returned
* @param delay true if the requester demands forced delays using explicit thread sleep
* @param profile
* @return a url in a CrawlEntry object
* @throws IOException
* @throws SpaceExceededException
*/
@Override
public Request pop(final boolean delay, final CrawlSwitchboard cs, final RobotsTxt robots) throws IOException {
// returns a crawl entry from the stack and ensures minimum delta times
long sleeptime = 0;
Request crawlEntry = null;
CrawlProfile profileEntry = null;
byte[] failhash = null;
while (!this.urlFileIndex.isEmpty()) {
byte[] nexthash = getbest(robots, cs);
if (nexthash == null) return null;
synchronized (this) {
Row.Entry rowEntry = (nexthash == null) ? null : this.urlFileIndex.remove(nexthash);
if (rowEntry == null) continue;
crawlEntry = new Request(rowEntry);
//Log.logInfo("Balancer", "fetched next url: " + crawlEntry.url().toNormalform(true, false));
// check blacklist (again) because the user may have created blacklist entries after the queue has been filled
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, crawlEntry.url())) {
ConcurrentLog.fine("CRAWLER", "URL '" + crawlEntry.url() + "' is in blacklist.");
continue;
}
// at this point we must check if the crawlEntry has relevance because the crawl profile still exists
// if not: return null. A calling method must handle the null value and try again
profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle()));
if (profileEntry == null) {
ConcurrentLog.fine("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
continue;
}
// depending on the caching policy we need sleep time to avoid DoS-like situations
sleeptime = Latency.getDomainSleepTime(robots, profileEntry, crawlEntry.url());
assert Base64Order.enhancedCoder.equal(nexthash, rowEntry.getPrimaryKeyBytes()) : "result = " + ASCII.String(nexthash) + ", rowEntry.getPrimaryKeyBytes() = " + ASCII.String(rowEntry.getPrimaryKeyBytes());
assert Base64Order.enhancedCoder.equal(nexthash, crawlEntry.url().hash()) : "result = " + ASCII.String(nexthash) + ", crawlEntry.url().hash() = " + ASCII.String(crawlEntry.url().hash());
if (failhash != null && Base64Order.enhancedCoder.equal(failhash, nexthash)) break; // prevent endless loops
break;
}
}
if (crawlEntry == null) return null;
ClientIdentification.Agent agent = profileEntry == null ? ClientIdentification.yacyInternetCrawlerAgent : profileEntry.getAgent();
long robotsTime = Latency.getRobotsTime(robots, crawlEntry.url(), agent);
Latency.updateAfterSelection(crawlEntry.url(), profileEntry == null ? 0 : robotsTime);
if (delay && sleeptime > 0) {
// force a busy waiting here
// in best case, this should never happen if the balancer works properly
// this is only to protection against the worst case, where the crawler could
// behave in a DoS-manner
ConcurrentLog.info("BALANCER", "forcing crawl-delay of " + sleeptime + " milliseconds for " + crawlEntry.url().getHost() + ": " + Latency.waitingRemainingExplain(crawlEntry.url(), robots, agent) + ", domainStacks.size() = " + this.domainStacks.size() + ", domainStacksInitSize = " + this.domStackInitSize);
long loops = sleeptime / 1000;
long rest = sleeptime % 1000;
if (loops < 3) {
rest = rest + 1000 * loops;
loops = 0;
}
Thread.currentThread().setName("Balancer waiting for " +crawlEntry.url().getHost() + ": " + sleeptime + " milliseconds");
synchronized(this) {
// must be synchronized here to avoid 'takeover' moves from other threads which then idle the same time which would not be enough
if (rest > 0) {try {this.wait(rest);} catch (final InterruptedException e) {}}
for (int i = 0; i < loops; i++) {
ConcurrentLog.info("BALANCER", "waiting for " + crawlEntry.url().getHost() + ": " + (loops - i) + " seconds remaining...");
try {this.wait(1000); } catch (final InterruptedException e) {}
}
}
Latency.updateAfterSelection(crawlEntry.url(), robotsTime);
}
return crawlEntry;
}
private byte[] getbest(final RobotsTxt robots, final CrawlSwitchboard cs) {
synchronized (this.zeroWaitingCandidates) {
if (this.zeroWaitingCandidates.size() > 0) {
byte[] urlhash = pickFromZeroWaiting();
if (urlhash != null) return urlhash;
}
this.zeroWaitingCandidates.clear();
// check if we need to get entries from the file index
try {
fillDomainStacks();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
// iterate over the domain stacks
final Iterator<Map.Entry<String, HostHandles>> i = this.domainStacks.entrySet().iterator();
Map.Entry<String, HostHandles> entry;
OrderedScoreMap<Map.Entry<String, byte[]>> nextZeroCandidates = new OrderedScoreMap<Map.Entry<String, byte[]>>(null);
OrderedScoreMap<Map.Entry<String, byte[]>> failoverCandidates = new OrderedScoreMap<Map.Entry<String, byte[]>>(null);
int newCandidatesForward = 1;
while (i.hasNext() && nextZeroCandidates.size() < 1000) {
entry = i.next();
final String hostname = entry.getKey();
final HostHandles hosthandles = entry.getValue();
// clean up empty entries
if (hosthandles.handleSet.isEmpty()) {
i.remove();
continue;
}
final byte[] urlhash = hosthandles.handleSet.getOne(0);
if (urlhash == null) continue;
int w;
Row.Entry rowEntry;
try {
rowEntry = this.urlFileIndex.get(urlhash, false);
if (rowEntry == null) continue; // may have been deleted there manwhile
Request crawlEntry = new Request(rowEntry);
CrawlProfile profileEntry = cs.get(UTF8.getBytes(crawlEntry.profileHandle()));
if (profileEntry == null) {
ConcurrentLog.warn("Balancer", "no profile entry for handle " + crawlEntry.profileHandle());
continue;
}
w = Latency.waitingRemaining(crawlEntry.url(), robots, profileEntry.getAgent());
} catch (final IOException e1) {
ConcurrentLog.warn("Balancer", e1.getMessage(), e1);
continue;
}
if (w <= 0) {
if (w == Integer.MIN_VALUE) {
if (newCandidatesForward-- > 0) {
nextZeroCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(hostname, urlhash), 10000);
} else {
failoverCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(hostname, urlhash), 0);
}
} else {
nextZeroCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(hostname, urlhash), hosthandles.handleSet.size());
}
} else {
failoverCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(hostname, urlhash), w);
}
}
//Log.logInfo("Balancer", "*** getbest: created new nextZeroCandidates-list, size = " + nextZeroCandidates.size() + ", domainStacks.size = " + this.domainStacks.size());
if (!nextZeroCandidates.isEmpty()) {
// take some of the nextZeroCandidates and put the best into the zeroWaitingCandidates
int pick = nextZeroCandidates.size() <= 10 ? nextZeroCandidates.size() : Math.max(1, nextZeroCandidates.size() / 3);
Iterator<Map.Entry<String, byte[]>> k = nextZeroCandidates.keys(false);
while (k.hasNext() && pick-- > 0) {
this.zeroWaitingCandidates.add(k.next());
}
//Log.logInfo("Balancer", "*** getbest: created new zeroWaitingCandidates-list, size = " + zeroWaitingCandidates.size() + ", domainStacks.size = " + this.domainStacks.size());
return pickFromZeroWaiting();
}
if (!failoverCandidates.isEmpty()) {
// bad luck: just take that one with least waiting
Iterator<Map.Entry<String, byte[]>> k = failoverCandidates.keys(true);
String besthost;
byte[] besturlhash;
Map.Entry<String, byte[]> hosthash;
while (k.hasNext()) {
hosthash = k.next();
//if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this!
besthost = hosthash.getKey();
besturlhash = hosthash.getValue();
removeHashFromDomainStacks(besthost, besturlhash);
//Log.logInfo("Balancer", "*** getbest: no zero waiting candidates, besthost = " + besthost);
return besturlhash;
}
}
//Log.logInfo("Balancer", "*** getbest: besturlhash == null");
return null; // this should never happen
}
}
private byte[] pickFromZeroWaiting() {
// by random we choose now either from the largest stack or from any of the other stacks
String host = null;
byte[] hash = null;
while (this.zeroWaitingCandidates.size() > 0) {
Map.Entry<String, byte[]> z = this.zeroWaitingCandidates.remove(this.random.nextInt(this.zeroWaitingCandidates.size()));
HostHandles hh = this.domainStacks.get(z.getKey());
if (hh == null) continue;
host = z.getKey(); if (host == null) continue;
hash = z.getValue(); if (hash == null) continue;
removeHashFromDomainStacks(host, hash);
ConcurrentLog.info("Balancer", "// getbest: picked a random from the zero-waiting stack: " + host + ", zeroWaitingCandidates.size = " + this.zeroWaitingCandidates.size());
return hash;
}
//Log.logInfo("Balancer", "*** getbest: picking from zero-waiting stack failed!" + " zeroWaitingCandidates.size = " + this.zeroWaitingCandidates.size());
this.zeroWaitingCandidates.clear();
return null;
}
private void fillDomainStacks() throws IOException {
if (!this.domainStacks.isEmpty() && System.currentTimeMillis() - this.lastDomainStackFill < 60000L) return;
this.domainStacks.clear();
this.lastDomainStackFill = System.currentTimeMillis();
final HandleSet blackhandles = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 10);
String host;
Request request;
int count = 0;
long timeout = System.currentTimeMillis() + 5000;
for (Row.Entry entry: this.urlFileIndex.random(10000)) {
if (entry == null) continue;
request = new Request(entry);
// check blacklist (again) because the user may have created blacklist entries after the queue has been filled
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, request.url())) {
ConcurrentLog.fine("CRAWLER", "URL '" + request.url() + "' is in blacklist.");
try {blackhandles.put(entry.getPrimaryKeyBytes());} catch (final SpaceExceededException e) {}
continue;
}
host = request.url().getHost();
try {
pushHashToDomainStacks(host, request.url().hosthash(), entry.getPrimaryKeyBytes());
} catch (final SpaceExceededException e) {
break;
}
count++;
if (this.domainStacks.size() >= 1000 || count >= 100000 || System.currentTimeMillis() > timeout) break;
}
// if we collected blacklist entries then delete them now
for (byte[] blackhandle: blackhandles) this.urlFileIndex.remove(blackhandle);
ConcurrentLog.info("BALANCER", "re-fill of domain stacks; fileIndex.size() = " + this.urlFileIndex.size() + ", domainStacks.size = " + this.domainStacks.size() + ", blackhandles = " + blackhandles.size() + ", collection time = " + (System.currentTimeMillis() - this.lastDomainStackFill) + " ms");
this.domStackInitSize = this.domainStacks.size();
}
@Override
public Iterator<Request> iterator() throws IOException {
return new EntryIterator();
}
private class EntryIterator implements Iterator<Request> {
private Iterator<Row.Entry> rowIterator;
public EntryIterator() throws IOException {
this.rowIterator = LegacyBalancer.this.urlFileIndex.rows();
}
@Override
public boolean hasNext() {
return (this.rowIterator == null) ? false : this.rowIterator.hasNext();
}
@Override
public Request next() {
final Row.Entry entry = this.rowIterator.next();
try {
return (entry == null) ? null : new Request(entry);
} catch (final IOException e) {
ConcurrentLog.logException(e);
this.rowIterator = null;
return null;
}
}
@Override
public void remove() {
if (this.rowIterator != null) this.rowIterator.remove();
}
}
@Override
public int removeAllByHostHashes(Set<String> hosthashes) {
return 0;
}
}