- added a memory protection for the IndexCell migration

- fix for bad cell file selection

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5763 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-03-31 19:17:45 +00:00
parent 568e8f1741
commit 3621aa96ab
7 changed files with 42 additions and 21 deletions

View File

@ -356,7 +356,7 @@ public class Balancer {
public synchronized void push(final CrawlEntry entry) throws IOException {
assert entry != null;
if (urlFileIndex.has(entry.url().hash().getBytes())) {
Log.logWarning("BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed");
//Log.logWarning("BALANCER", "double-check has failed for urlhash " + entry.url().hash() + " in " + stackname + " - fixed");
return;
}

View File

@ -197,9 +197,9 @@ public class BLOBArray implements BLOB {
}
public synchronized File[] unmountSmallest(long maxResultSize) {
File f0 = smallestBLOB(null);
File f0 = smallestBLOB(null, maxResultSize);
if (f0 == null) return null;
File f1 = smallestBLOB(f0);
File f1 = smallestBLOB(f0, maxResultSize - f0.length());
if (f1 == null) return null;
unmountBLOB(f0, false);
@ -207,22 +207,23 @@ public class BLOBArray implements BLOB {
return new File[]{f0, f1};
}
public synchronized File unmountSmallestBLOB() {
return smallestBLOB(null);
public synchronized File unmountSmallestBLOB(long maxResultSize) {
return smallestBLOB(null, maxResultSize);
}
public synchronized File smallestBLOB(File excluding) {
public synchronized File smallestBLOB(File excluding, long maxsize) {
if (this.blobs.size() == 0) return null;
int bestIndex = -1;
long smallest = Long.MAX_VALUE;
for (int i = 0; i < this.blobs.size(); i++) {
if (excluding != null && this.blobs.get(i).location.getAbsolutePath().equals(excluding.getAbsoluteFile())) continue;
if (this.blobs.get(i).location == excluding) continue;
if (this.blobs.get(i).location.length() < smallest) {
smallest = this.blobs.get(i).location.length();
bestIndex = i;
}
}
if (bestIndex == -1) return null;
if (smallest > maxsize) return null;
return this.blobs.get(bestIndex).location;
}
@ -256,7 +257,7 @@ public class BLOBArray implements BLOB {
* @return
*/
public synchronized int entries() {
return this.blobs.size();
return (this.blobs == null) ? 0 : this.blobs.size();
}
/**

View File

@ -398,6 +398,7 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
* @return
*/
public RowSet merge(RowSet c) {
assert c != null;
/*
if (this.isSorted() && this.size() >= c.size()) {
return mergeInsert(this, c);
@ -425,7 +426,7 @@ public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.E
* @return
*/
protected static RowSet mergeEnum(RowCollection c0, RowCollection c1) {
assert c0.rowdef == c1.rowdef;
assert c0.rowdef == c1.rowdef : c0.rowdef.toString() + " != " + c1.rowdef.toString();
RowSet r = new RowSet(c0.rowdef, c0.size() + c1.size());
c0.sort();
c1.sort();

View File

@ -36,6 +36,7 @@ import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.MergeIterator;
import de.anomic.kelondro.order.Order;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.server.serverProfiling;
/*
@ -79,6 +80,7 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
this.lastCleanup = System.currentTimeMillis();
this.targetFileSize = targetFileSize;
this.maxFileSize = maxFileSize;
cacheCleanup();
}
@ -94,14 +96,14 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
public synchronized void add(ReferenceContainer newEntries) throws IOException {
this.ram.add(newEntries);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump();
cacheDumpIfNecessary();
cacheCleanup();
}
public synchronized void add(String hash, ReferenceRow entry) throws IOException {
this.ram.add(hash, entry);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump();
cacheDumpIfNecessary();
cacheCleanup();
}
@ -157,6 +159,7 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
return c0;
}
this.array.delete(wordHash);
cacheCleanup();
if (c0 == null) return c1;
return c1.merge(c0);
}
@ -271,6 +274,16 @@ public final class IndexCell extends AbstractBufferedIndex implements BufferedIn
* cache control methods
*/
private synchronized void cacheDumpIfNecessary() {
if (this.ram.size() > this.maxRamEntries || MemoryControl.available() < 20 * 1024 * 1024) {
try {
cacheDump();
} catch (IOException e) {
e.printStackTrace();
}
}
}
private synchronized void cacheDump() throws IOException {
// dump the ram
File dumpFile = this.array.newContainerBLOBFile();

View File

@ -206,13 +206,16 @@ public final class IndexCollectionMigration extends AbstractBufferedIndex implem
}
public ReferenceContainer delete(final String wordHash) throws IOException {
final ReferenceContainer c = new ReferenceContainer(
wordHash,
ReferenceRow.urlEntryRow,
cell.count(wordHash));
c.addAllUnique(cell.delete(wordHash));
if (this.collections != null) c.merge(collections.delete(wordHash));
return c;
ReferenceContainer cc = cell.delete(wordHash);
if (cc == null) {
if (collections == null) return null;
return collections.delete(wordHash);
} else {
if (collections == null) return cc;
ReferenceContainer cd = collections.delete(wordHash);
if (cd == null) return cc;
return cc.merge(cd);
}
}
public boolean remove(final String wordHash, final String urlHash) throws IOException {

View File

@ -36,6 +36,7 @@ import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowSet;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.util.Log;
public final class ReferenceContainerArray {
@ -250,18 +251,21 @@ public final class ReferenceContainerArray {
File[] ff = this.array.unmountBestMatch(2.0, targetFileSize);
if (ff != null) {
Log.logInfo("RICELL-shrink", "doing unmountBestMatch(2.0, " + targetFileSize + ")");
merger.merge(ff[0], ff[1], this.array, this.payloadrow, newContainerBLOBFile());
return true;
}
ff = this.array.unmountSmallest(targetFileSize);
if (ff != null) {
Log.logInfo("RICELL-shrink", "doing unmountSmallest(" + targetFileSize + ")");
merger.merge(ff[0], ff[1], this.array, this.payloadrow, newContainerBLOBFile());
return true;
}
ff = this.array.unmountBestMatch(2.0, maxFileSize);
if (ff != null) {
Log.logInfo("RICELL-shrink", "doing unmountBestMatch(2.0, " + maxFileSize + ")");
merger.merge(ff[0], ff[1], this.array, this.payloadrow, newContainerBLOBFile());
return true;
}

View File

@ -421,13 +421,12 @@ public final class ReferenceContainerCache extends AbstractIndex implements Inde
public synchronized void add(final ReferenceContainer container) {
// this puts the entries into the cache
int added = 0;
if ((container == null) || (container.size() == 0)) return;
assert this.cache != null;
if (this.cache == null || container == null || container.size() == 0) return;
// put new words into cache
final String wordHash = container.getWordHash();
ReferenceContainer entries = cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
int added = 0;
if (entries == null) {
entries = container.topLevelClone();
added = entries.size();