fixed missing remove operation in balancer

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4990 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2008-07-11 12:03:18 +00:00
parent 606b323a2d
commit 05c26d58d9
3 changed files with 106 additions and 75 deletions

View File

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.592
releaseVersion=0.593
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

View File

@ -241,6 +241,20 @@ public class Balancer {
if (urlHashes.contains(h)) j.remove();
}
// iterate through the domain stacks
Iterator<Map.Entry<String, LinkedList<String>>> k = domainStacks.entrySet().iterator();
Map.Entry<String, LinkedList<String>> se;
LinkedList<String> stack;
while (k.hasNext()) {
se = k.next();
stack = se.getValue();
i = stack.iterator();
while (i.hasNext()) {
if (urlHashes.contains(i.next())) i.remove();
}
if (stack.size() == 0) k.remove();
}
return removedCounter;
}
@ -256,13 +270,15 @@ public class Balancer {
public int size() {
int componentsize = urlFileIndex.size();
/*
assert componentsize == urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks() :
"size wrong in " + stackname +
" - urlFileIndex = " + urlFileIndex.size() +
", componentsize = " + componentsize +
", componentsize = " + urlFileStack.size() + urlRAMStack.size() + sizeDomainStacks() +
" = (urlFileStack = " + urlFileStack.size() +
", urlRAMStack = " + urlRAMStack.size() +
", sizeDomainStacks = " + sizeDomainStacks() + ")";
*/
return componentsize;
}

View File

@ -56,6 +56,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.concurrent.ConcurrentHashMap;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
@ -78,6 +79,7 @@ public class RobotsTxt {
kelondroMap robotsTable;
private final File robotsTableFile;
private ConcurrentHashMap<String, Long> syncObjects;
//private static final HashSet<String> loadedRobots = new HashSet<String>(); // only for debugging
public RobotsTxt(File robotsTableFile) {
@ -94,6 +96,7 @@ public class RobotsTxt {
blob = new kelondroBLOBTree(robotsTableFile, true, true, 256, 512, '_', kelondroNaturalOrder.naturalOrder, false, false, true);
}
robotsTable = new kelondroMap(blob, 100);
syncObjects = new ConcurrentHashMap<String, Long>();
}
private void resetDatabase() {
@ -133,7 +136,18 @@ public class RobotsTxt {
robotsTxt4Host == null ||
robotsTxt4Host.getLoadedDate() == null ||
System.currentTimeMillis() - robotsTxt4Host.getLoadedDate().getTime() > 7*24*60*60*1000
)) synchronized(this) {
)) {
// make or get a synchronization object
Long syncObj = this.syncObjects.get(urlHostPort);
if (syncObj == null) {
syncObj = new Long(System.currentTimeMillis());
this.syncObjects.put(urlHostPort, syncObj);
}
// we can now synchronize for each host separately
synchronized (syncObj) {
// if we have not found any data or the data is older than 7 days, we need to load it from the remote server
// check the robots table again for all threads that come here because they waited for another one
@ -215,6 +229,7 @@ public class RobotsTxt {
(Integer) parserResult[2]);
}
}
}
return robotsTxt4Host;
}