mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
refactoring
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2147 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
bc94a714b2
commit
cb295fbbdc
|
@ -1,4 +1,4 @@
|
|||
// indexEntryPrototype.java
|
||||
// indexbstractEntry.java
|
||||
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
|
||||
// first published 20.05.2006 on http://www.anomic.de
|
||||
//
|
||||
|
@ -29,7 +29,7 @@ package de.anomic.index;
|
|||
//import de.anomic.plasma.plasmaURL;
|
||||
import de.anomic.plasma.plasmaWordIndex;
|
||||
|
||||
public abstract class indexEntryPrototype implements indexEntry {
|
||||
public abstract class indexAbstractEntry implements indexEntry {
|
||||
|
||||
// the associated hash
|
||||
protected String urlHash;
|
||||
|
@ -57,40 +57,40 @@ public abstract class indexEntryPrototype implements indexEntry {
|
|||
public abstract String toPropertyForm();
|
||||
|
||||
public void combineDistance(indexEntry oe) {
|
||||
this.worddistance = this.worddistance + ((indexEntryPrototype) oe).worddistance + Math.abs(this.posintext - ((indexEntryPrototype) oe).posintext);
|
||||
this.posintext = Math.min(this.posintext, ((indexEntryPrototype) oe).posintext);
|
||||
if (this.posofphrase != ((indexEntryPrototype) oe).posofphrase) this.posinphrase = 0; // (unknown)
|
||||
this.posofphrase = Math.min(this.posofphrase, ((indexEntryPrototype) oe).posofphrase);
|
||||
this.wordcount = (this.wordcount + ((indexEntryPrototype) oe).wordcount) / 2;
|
||||
this.worddistance = this.worddistance + ((indexAbstractEntry) oe).worddistance + Math.abs(this.posintext - ((indexAbstractEntry) oe).posintext);
|
||||
this.posintext = Math.min(this.posintext, ((indexAbstractEntry) oe).posintext);
|
||||
if (this.posofphrase != ((indexAbstractEntry) oe).posofphrase) this.posinphrase = 0; // (unknown)
|
||||
this.posofphrase = Math.min(this.posofphrase, ((indexAbstractEntry) oe).posofphrase);
|
||||
this.wordcount = (this.wordcount + ((indexAbstractEntry) oe).wordcount) / 2;
|
||||
}
|
||||
|
||||
public void min(indexEntry other) {
|
||||
if (this.hitcount > ((indexEntryPrototype) other).hitcount) this.hitcount = ((indexEntryPrototype) other).hitcount;
|
||||
if (this.wordcount > ((indexEntryPrototype) other).wordcount) this.wordcount = ((indexEntryPrototype) other).wordcount;
|
||||
if (this.phrasecount > ((indexEntryPrototype) other).phrasecount) this.phrasecount = ((indexEntryPrototype) other).phrasecount;
|
||||
if (this.posintext > ((indexEntryPrototype) other).posintext) this.posintext = ((indexEntryPrototype) other).posintext;
|
||||
if (this.posinphrase > ((indexEntryPrototype) other).posinphrase) this.posinphrase = ((indexEntryPrototype) other).posinphrase;
|
||||
if (this.posofphrase > ((indexEntryPrototype) other).posofphrase) this.posofphrase = ((indexEntryPrototype) other).posofphrase;
|
||||
if (this.worddistance > ((indexEntryPrototype) other).worddistance) this.worddistance = ((indexEntryPrototype) other).worddistance;
|
||||
if (this.lastModified > ((indexEntryPrototype) other).lastModified) this.lastModified = ((indexEntryPrototype) other).lastModified;
|
||||
if (this.quality > ((indexEntryPrototype) other).quality) this.quality = ((indexEntryPrototype) other).quality;
|
||||
if (this.hitcount > ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount;
|
||||
if (this.wordcount > ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount;
|
||||
if (this.phrasecount > ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount;
|
||||
if (this.posintext > ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext;
|
||||
if (this.posinphrase > ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase;
|
||||
if (this.posofphrase > ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase;
|
||||
if (this.worddistance > ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance;
|
||||
if (this.lastModified > ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified;
|
||||
if (this.quality > ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality;
|
||||
}
|
||||
|
||||
public void max(indexEntry other) {
|
||||
if (this.hitcount < ((indexEntryPrototype) other).hitcount) this.hitcount = ((indexEntryPrototype) other).hitcount;
|
||||
if (this.wordcount < ((indexEntryPrototype) other).wordcount) this.wordcount = ((indexEntryPrototype) other).wordcount;
|
||||
if (this.phrasecount < ((indexEntryPrototype) other).phrasecount) this.phrasecount = ((indexEntryPrototype) other).phrasecount;
|
||||
if (this.posintext < ((indexEntryPrototype) other).posintext) this.posintext = ((indexEntryPrototype) other).posintext;
|
||||
if (this.posinphrase < ((indexEntryPrototype) other).posinphrase) this.posinphrase = ((indexEntryPrototype) other).posinphrase;
|
||||
if (this.posofphrase < ((indexEntryPrototype) other).posofphrase) this.posofphrase = ((indexEntryPrototype) other).posofphrase;
|
||||
if (this.worddistance < ((indexEntryPrototype) other).worddistance) this.worddistance = ((indexEntryPrototype) other).worddistance;
|
||||
if (this.lastModified < ((indexEntryPrototype) other).lastModified) this.lastModified = ((indexEntryPrototype) other).lastModified;
|
||||
if (this.quality < ((indexEntryPrototype) other).quality) this.quality = ((indexEntryPrototype) other).quality;
|
||||
if (this.hitcount < ((indexAbstractEntry) other).hitcount) this.hitcount = ((indexAbstractEntry) other).hitcount;
|
||||
if (this.wordcount < ((indexAbstractEntry) other).wordcount) this.wordcount = ((indexAbstractEntry) other).wordcount;
|
||||
if (this.phrasecount < ((indexAbstractEntry) other).phrasecount) this.phrasecount = ((indexAbstractEntry) other).phrasecount;
|
||||
if (this.posintext < ((indexAbstractEntry) other).posintext) this.posintext = ((indexAbstractEntry) other).posintext;
|
||||
if (this.posinphrase < ((indexAbstractEntry) other).posinphrase) this.posinphrase = ((indexAbstractEntry) other).posinphrase;
|
||||
if (this.posofphrase < ((indexAbstractEntry) other).posofphrase) this.posofphrase = ((indexAbstractEntry) other).posofphrase;
|
||||
if (this.worddistance < ((indexAbstractEntry) other).worddistance) this.worddistance = ((indexAbstractEntry) other).worddistance;
|
||||
if (this.lastModified < ((indexAbstractEntry) other).lastModified) this.lastModified = ((indexAbstractEntry) other).lastModified;
|
||||
if (this.quality < ((indexAbstractEntry) other).quality) this.quality = ((indexAbstractEntry) other).quality;
|
||||
}
|
||||
|
||||
public void normalize(indexEntry mi, indexEntry ma) {
|
||||
indexEntryPrototype min = (indexEntryPrototype) mi;
|
||||
indexEntryPrototype max = (indexEntryPrototype) ma;
|
||||
indexAbstractEntry min = (indexAbstractEntry) mi;
|
||||
indexAbstractEntry max = (indexAbstractEntry) ma;
|
||||
this.hitcount = (this.hitcount == 0) ? 0 : 1 + 255 * (this.hitcount - min.hitcount ) / (1 + max.hitcount - min.hitcount);
|
||||
this.wordcount = (this.wordcount == 0) ? 0 : 1 + 255 * (this.wordcount - min.wordcount ) / (1 + max.wordcount - min.wordcount);
|
||||
this.phrasecount = (this.phrasecount == 0) ? 0 : 1 + 255 * (this.phrasecount - min.phrasecount ) / (1 + max.phrasecount - min.phrasecount);
|
||||
|
@ -103,7 +103,7 @@ public abstract class indexEntryPrototype implements indexEntry {
|
|||
}
|
||||
|
||||
public indexEntry generateNormalized(indexEntry min, indexEntry max) {
|
||||
indexEntry e = (indexEntryPrototype) this.clone();
|
||||
indexEntry e = (indexAbstractEntry) this.clone();
|
||||
e.normalize(min, max);
|
||||
return e;
|
||||
}
|
||||
|
@ -125,18 +125,18 @@ public abstract class indexEntryPrototype implements indexEntry {
|
|||
|
||||
public boolean isNewer(indexEntry other) {
|
||||
if (other == null) return true;
|
||||
if (this.lastModified > ((indexEntryPrototype) other).lastModified) return true;
|
||||
if (this.lastModified == ((indexEntryPrototype) other).getLastModified()) {
|
||||
if (this.quality > ((indexEntryPrototype) other).quality) return true;
|
||||
if (this.lastModified > ((indexAbstractEntry) other).lastModified) return true;
|
||||
if (this.lastModified == ((indexAbstractEntry) other).getLastModified()) {
|
||||
if (this.quality > ((indexAbstractEntry) other).quality) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isOlder(indexEntry other) {
|
||||
if (other == null) return false;
|
||||
if (this.lastModified < ((indexEntryPrototype) other).getLastModified()) return true;
|
||||
if (this.lastModified == ((indexEntryPrototype) other).getLastModified()) {
|
||||
if (this.quality < ((indexEntryPrototype) other).quality) return true;
|
||||
if (this.lastModified < ((indexAbstractEntry) other).getLastModified()) return true;
|
||||
if (this.lastModified == ((indexAbstractEntry) other).getLastModified()) {
|
||||
if (this.quality < ((indexAbstractEntry) other).quality) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
|
@ -30,10 +30,10 @@ import de.anomic.plasma.plasmaWordIndexEntryContainer;
|
|||
|
||||
public abstract class indexAbstractRI implements indexRI {
|
||||
|
||||
public boolean addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
|
||||
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
|
||||
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
|
||||
container.add(newEntry);
|
||||
return addEntries(container, updateTime, dhtCase) > 0;
|
||||
return addEntries(container, updateTime, dhtCase);
|
||||
}
|
||||
|
||||
public long getUpdateTime(String wordHash) {
|
||||
|
|
|
@ -45,7 +45,6 @@ package de.anomic.index;
|
|||
import java.util.Iterator;
|
||||
|
||||
import de.anomic.plasma.plasmaWordIndexEntryContainer;
|
||||
import de.anomic.plasma.plasmaWordIndexEntryInstance;
|
||||
|
||||
public interface indexRI {
|
||||
|
||||
|
@ -58,8 +57,8 @@ public interface indexRI {
|
|||
public plasmaWordIndexEntryContainer deleteContainer(String wordHash);
|
||||
|
||||
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete);
|
||||
public boolean addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
|
||||
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase);
|
||||
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase);
|
||||
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean dhtCase);
|
||||
|
||||
public void close(int waitingSeconds);
|
||||
|
||||
|
|
|
@ -158,16 +158,17 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) {
|
||||
if (ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) {
|
||||
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry entry, long updateTime, boolean dhtCase) {
|
||||
plasmaWordIndexEntryContainer c;
|
||||
if ((c = ramCache.addEntry(wordHash, entry, updateTime, dhtCase)) == null) {
|
||||
if (!dhtCase) flushControl();
|
||||
return true;
|
||||
return null;
|
||||
}
|
||||
return false;
|
||||
return c;
|
||||
}
|
||||
|
||||
public int addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) {
|
||||
int added = ramCache.addEntries(entries, updateTime, dhtCase);
|
||||
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer entries, long updateTime, boolean dhtCase) {
|
||||
plasmaWordIndexEntryContainer added = ramCache.addEntries(entries, updateTime, dhtCase);
|
||||
|
||||
// force flush
|
||||
if (!dhtCase) flushControl();
|
||||
|
@ -193,7 +194,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
private synchronized void flushCache(String wordHash) {
|
||||
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
|
||||
if (c != null) {
|
||||
plasmaWordIndexEntryContainer feedback = assortmentCluster.storeTry(wordHash, c);
|
||||
plasmaWordIndexEntryContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
|
||||
if (feedback != null) {
|
||||
backend.addEntries(feedback, System.currentTimeMillis(), true);
|
||||
}
|
||||
|
@ -290,7 +291,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
container.add(ramCache.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
|
||||
|
||||
// get from assortments
|
||||
container.add(assortmentCluster.getFromAll(wordHash, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
|
||||
container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime / 2), (maxTime < 0) ? -1 : maxTime / 2);
|
||||
|
||||
// get from backend
|
||||
if (maxTime > 0) {
|
||||
|
@ -331,7 +332,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
}
|
||||
|
||||
public int size() {
|
||||
return java.lang.Math.max(assortmentCluster.sizeTotal(),
|
||||
return java.lang.Math.max(assortmentCluster.size(),
|
||||
java.lang.Math.max(backend.size(), ramCache.size()));
|
||||
}
|
||||
|
||||
|
@ -351,13 +352,13 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
|
||||
public synchronized void close(int waitingBoundSeconds) {
|
||||
ramCache.close(waitingBoundSeconds);
|
||||
assortmentCluster.close();
|
||||
assortmentCluster.close(-1);
|
||||
backend.close(10);
|
||||
}
|
||||
|
||||
public synchronized plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
|
||||
plasmaWordIndexEntryContainer c = ramCache.deleteContainer(wordHash);
|
||||
c.add(assortmentCluster.removeFromAll(wordHash, -1), -1);
|
||||
c.add(assortmentCluster.deleteContainer(wordHash, -1), -1);
|
||||
c.add(backend.deleteContainer(wordHash), -1);
|
||||
return c;
|
||||
}
|
||||
|
@ -368,7 +369,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
synchronized (this) {
|
||||
removed = ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
|
||||
if (removed == urlHashes.length) return removed;
|
||||
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordHash, -1);
|
||||
plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordHash, -1);
|
||||
if (container != null) {
|
||||
removed += container.removeEntries(wordHash, urlHashes, deleteComplete);
|
||||
if (container.size() != 0) {
|
||||
|
@ -439,7 +440,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
|
||||
return new kelondroMergeIterator(
|
||||
ramCache.wordHashes(startWordHash, false),
|
||||
assortmentCluster.hashConjunction(startWordHash, true, false),
|
||||
assortmentCluster.wordHashes(startWordHash, true, false),
|
||||
kelondroNaturalOrder.naturalOrder,
|
||||
true);
|
||||
}
|
||||
|
@ -447,7 +448,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
return new kelondroMergeIterator(
|
||||
new kelondroMergeIterator(
|
||||
ramCache.wordHashes(startWordHash, false),
|
||||
assortmentCluster.hashConjunction(startWordHash, true, false),
|
||||
assortmentCluster.wordHashes(startWordHash, true, false),
|
||||
kelondroNaturalOrder.naturalOrder,
|
||||
true),
|
||||
backend.wordHashes(startWordHash, true, false),
|
||||
|
@ -505,7 +506,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
} else {
|
||||
// take out all words from the assortment to see if it fits
|
||||
// together with the extracted assortment
|
||||
plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash, -1);
|
||||
plasmaWordIndexEntryContainer container = assortmentCluster.deleteContainer(wordhash, -1);
|
||||
if (size + container.size() > assortmentCluster.clusterCapacity) {
|
||||
// this will also be too big to integrate, add to entity
|
||||
entity.addEntries(container);
|
||||
|
@ -525,7 +526,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
|
|||
entity.deleteComplete();
|
||||
entity.close(); entity = null;
|
||||
// integrate the container into the assortments; this will work
|
||||
assortmentCluster.storeTry(wordhash, container);
|
||||
assortmentCluster.addEntries(container, container.updated(), false);
|
||||
return new Integer(size);
|
||||
} catch (kelondroException e) {
|
||||
// database corrupted, we simply give up the database and delete it
|
||||
|
|
|
@ -125,13 +125,13 @@ public final class plasmaWordIndexAssortment {
|
|||
if (log != null) log.logConfig("Created new Assortment Database, width " + assortmentLength + ", " + bufferkb + "kb buffer");
|
||||
}
|
||||
|
||||
public void store(String wordHash, plasmaWordIndexEntryContainer newContainer) {
|
||||
public void store(plasmaWordIndexEntryContainer newContainer) {
|
||||
// stores a word index to assortment database
|
||||
// this throws an exception if the word hash already existed
|
||||
//log.logDebug("storeAssortment: wordHash=" + wordHash + ", urlHash=" + entry.getUrlHash() + ", time=" + creationTime);
|
||||
if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size");
|
||||
byte[][] row = new byte[this.bufferStructureLength][];
|
||||
row[0] = wordHash.getBytes();
|
||||
row[0] = newContainer.wordHash().getBytes();
|
||||
row[1] = kelondroRecords.long2bytes(1, 4);
|
||||
row[2] = kelondroRecords.long2bytes(newContainer.updated(), 8);
|
||||
Iterator entries = newContainer.entries();
|
||||
|
|
|
@ -51,13 +51,15 @@ import java.io.IOException;
|
|||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
||||
import de.anomic.index.indexRI;
|
||||
import de.anomic.index.indexAbstractRI;
|
||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||
import de.anomic.kelondro.kelondroObjectCache;
|
||||
import de.anomic.kelondro.kelondroRecords;
|
||||
import de.anomic.kelondro.kelondroMergeIterator;
|
||||
import de.anomic.server.logging.serverLog;
|
||||
|
||||
public final class plasmaWordIndexAssortmentCluster {
|
||||
public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI implements indexRI {
|
||||
|
||||
// class variables
|
||||
private int clusterCount; // number of cluster files
|
||||
|
@ -95,37 +97,37 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
}
|
||||
}
|
||||
|
||||
private plasmaWordIndexEntryContainer storeSingular(String wordHash, plasmaWordIndexEntryContainer newContainer) {
|
||||
private plasmaWordIndexEntryContainer storeSingular(plasmaWordIndexEntryContainer newContainer) {
|
||||
// this tries to store the record. If the record does not fit, or a same hash already
|
||||
// exists and would not fit together with the new record, then the record is deleted from
|
||||
// the assortmen(s) and returned together with the newRecord.
|
||||
// if storage was successful, NULL is returned.
|
||||
if (newContainer.size() > clusterCount) return newContainer; // it will not fit
|
||||
plasmaWordIndexEntryContainer buffer;
|
||||
while ((buffer = assortments[newContainer.size() - 1].remove(wordHash)) != null) {
|
||||
while ((buffer = assortments[newContainer.size() - 1].remove(newContainer.wordHash())) != null) {
|
||||
if (newContainer.add(buffer, -1) == 0) return newContainer; // security check; othervise this loop does not terminate
|
||||
if (newContainer.size() > clusterCount) return newContainer; // it will not fit
|
||||
}
|
||||
// the assortment (newContainer.size() - 1) should now be empty. put it in there
|
||||
assortments[newContainer.size() - 1].store(wordHash, newContainer);
|
||||
assortments[newContainer.size() - 1].store(newContainer);
|
||||
// return null to show that we have stored the new Record successfully
|
||||
return null;
|
||||
}
|
||||
|
||||
private void storeForced(String wordHash, plasmaWordIndexEntryContainer newContainer) {
|
||||
private void storeForced(plasmaWordIndexEntryContainer newContainer) {
|
||||
// this stores the record and overwrites an existing record.
|
||||
// this is safe if we can be shure that the record does not exist before.
|
||||
if ((newContainer == null) || (newContainer.size() == 0) || (newContainer.size() > clusterCount)) return; // it will not fit
|
||||
assortments[newContainer.size() - 1].store(wordHash, newContainer);
|
||||
assortments[newContainer.size() - 1].store(newContainer);
|
||||
}
|
||||
|
||||
private void storeStretched(String wordHash, plasmaWordIndexEntryContainer newContainer) {
|
||||
private void storeStretched(plasmaWordIndexEntryContainer newContainer) {
|
||||
// this stores the record and stretches the storage over
|
||||
// all the assortments that are necessary to fit in the record
|
||||
// IMPORTANT: it must be ensured that the wordHash does not exist in the cluster before
|
||||
// i.e. by calling removeFromAll
|
||||
if (newContainer.size() <= clusterCount) {
|
||||
storeForced(wordHash, newContainer);
|
||||
storeForced(newContainer);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -144,20 +146,20 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
plasmaWordIndexEntryContainer c;
|
||||
Iterator i = newContainer.entries();
|
||||
for (int j = clusterStart; j >= 1; j--) {
|
||||
c = new plasmaWordIndexEntryContainer(wordHash);
|
||||
c = new plasmaWordIndexEntryContainer(newContainer.wordHash());
|
||||
for (int k = 0; k < j; k++) {
|
||||
if (i.hasNext()) {
|
||||
c.add((plasmaWordIndexEntryInstance) i.next(), newContainer.updated());
|
||||
} else {
|
||||
storeForced(wordHash, c);
|
||||
storeForced(c);
|
||||
return;
|
||||
}
|
||||
}
|
||||
storeForced(wordHash, c);
|
||||
storeForced(c);
|
||||
}
|
||||
}
|
||||
|
||||
public plasmaWordIndexEntryContainer storeTry(String wordHash, plasmaWordIndexEntryContainer newContainer) {
|
||||
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer newContainer, long creationTime, boolean dhtCase) {
|
||||
// this is called by the index ram cache flush process
|
||||
// it returnes NULL if the storage was successful
|
||||
// it returnes a new container if the given container cannot be stored
|
||||
|
@ -174,7 +176,7 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
int selectedAssortment = testsize - 1;
|
||||
while (selectedAssortment >= 0) {
|
||||
if (selectedAssortment + 1 <= need) {
|
||||
spaces[selectedAssortment] = (assortments[selectedAssortment].get(wordHash) == null) ? (selectedAssortment + 1) : 0;
|
||||
spaces[selectedAssortment] = (assortments[selectedAssortment].get(newContainer.wordHash()) == null) ? (selectedAssortment + 1) : 0;
|
||||
need -= spaces[selectedAssortment];
|
||||
assert (need >= 0);
|
||||
if (need == 0) break;
|
||||
|
@ -187,27 +189,31 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
Iterator i = newContainer.entries();
|
||||
for (int j = testsize - 1; j >= 0; j--) {
|
||||
if (spaces[j] == 0) continue;
|
||||
c = new plasmaWordIndexEntryContainer(wordHash);
|
||||
c = new plasmaWordIndexEntryContainer(newContainer.wordHash());
|
||||
for (int k = 0; k <= j; k++) {
|
||||
assert (i.hasNext());
|
||||
c.add((plasmaWordIndexEntryInstance) i.next(), newContainer.updated());
|
||||
}
|
||||
storeForced(wordHash, c);
|
||||
storeForced(c);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer);
|
||||
if (newContainer.size() <= clusterCount) newContainer = storeSingular(newContainer);
|
||||
if (newContainer == null) return null;
|
||||
|
||||
// clean up the whole thing and try to insert the container then
|
||||
newContainer.add(removeFromAll(wordHash, -1), -1);
|
||||
newContainer.add(deleteContainer(newContainer.wordHash(), -1), -1);
|
||||
if (newContainer.size() > clusterCapacity) return newContainer;
|
||||
storeStretched(wordHash, newContainer);
|
||||
storeStretched(newContainer);
|
||||
return null;
|
||||
}
|
||||
|
||||
public plasmaWordIndexEntryContainer removeFromAll(String wordHash, long maxTime) {
|
||||
public plasmaWordIndexEntryContainer deleteContainer(String wordHash) {
|
||||
return deleteContainer(wordHash, -1);
|
||||
}
|
||||
|
||||
public plasmaWordIndexEntryContainer deleteContainer(String wordHash, long maxTime) {
|
||||
// removes all records from all the assortments and return them
|
||||
plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
|
||||
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
|
||||
|
@ -221,7 +227,17 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
return record;
|
||||
}
|
||||
|
||||
public plasmaWordIndexEntryContainer getFromAll(String wordHash, long maxTime) {
|
||||
public int removeEntries(String wordHash, String[] referenceHashes, boolean deleteComplete) {
|
||||
plasmaWordIndexEntryContainer c = deleteContainer(wordHash, -1);
|
||||
int b = c.size();
|
||||
c.removeEntries(wordHash, referenceHashes, false);
|
||||
if (c.size() != 0) {
|
||||
addEntries(c, c.updated(), false);
|
||||
}
|
||||
return b - c.size();
|
||||
}
|
||||
|
||||
public plasmaWordIndexEntryContainer getContainer(String wordHash, boolean deleteIfEmpty, long maxTime) {
|
||||
// collect all records from all the assortments and return them
|
||||
plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
|
||||
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
|
||||
|
@ -244,14 +260,22 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
return size;
|
||||
}
|
||||
|
||||
public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) throws IOException {
|
||||
public Iterator wordHashes(String startWordHash, boolean rot) {
|
||||
try {
|
||||
return wordHashes(startWordHash, true, rot);
|
||||
} catch (IOException e) {
|
||||
return new HashSet().iterator();
|
||||
}
|
||||
}
|
||||
|
||||
public Iterator wordHashes(String startWordHash, boolean up, boolean rot) throws IOException {
|
||||
HashSet iterators = new HashSet();
|
||||
//if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!");
|
||||
for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, rot));
|
||||
return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up);
|
||||
}
|
||||
|
||||
public int sizeTotal() {
|
||||
public int size() {
|
||||
int total = 0;
|
||||
for (int i = 0; i < clusterCount; i++) total += assortments[i].size();
|
||||
return total;
|
||||
|
@ -290,7 +314,7 @@ public final class plasmaWordIndexAssortmentCluster {
|
|||
return kelondroObjectCache.combinedStatus(a, a.length);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
public void close(int waitingSeconds) {
|
||||
for (int i = 0; i < clusterCount; i++) assortments[i].close();
|
||||
}
|
||||
|
||||
|
|
|
@ -416,7 +416,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
|
|||
return delCount;
|
||||
}
|
||||
|
||||
public int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) {
|
||||
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean dhtCase) {
|
||||
// this puts the entries into the cache, not into the assortment directly
|
||||
int added = 0;
|
||||
|
||||
|
@ -440,10 +440,10 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
|
|||
}
|
||||
entries = null;
|
||||
}
|
||||
return added;
|
||||
return null;
|
||||
}
|
||||
|
||||
public boolean addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
|
||||
public plasmaWordIndexEntryContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
|
||||
if (dhtCase) synchronized (kCache) {
|
||||
// put container into kCache
|
||||
plasmaWordIndexEntryContainer container = new plasmaWordIndexEntryContainer(wordHash);
|
||||
|
@ -451,7 +451,7 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
|
|||
kCache.put(new Long(updateTime + kCacheInc), container);
|
||||
kCacheInc++;
|
||||
if (kCacheInc > 10000) kCacheInc = 0;
|
||||
return true;
|
||||
return null;
|
||||
} else synchronized (wCache) {
|
||||
plasmaWordIndexEntryContainer container = (plasmaWordIndexEntryContainer) wCache.get(wordHash);
|
||||
if (container == null) container = new plasmaWordIndexEntryContainer(wordHash);
|
||||
|
@ -460,11 +460,11 @@ public final class plasmaWordIndexCache extends indexAbstractRI implements index
|
|||
wCache.put(wordHash, container);
|
||||
hashScore.incScore(wordHash);
|
||||
hashDate.setScore(wordHash, intTime(updateTime));
|
||||
return true;
|
||||
return null;
|
||||
}
|
||||
container = null;
|
||||
entries = null;
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI
|
|||
}
|
||||
|
||||
public Iterator wordHashes(String startHash, boolean rot) {
|
||||
return wordHashes(startHash, rot);
|
||||
return wordHashes(startHash, true, rot);
|
||||
}
|
||||
|
||||
public Iterator wordHashes(String startHash, boolean up, boolean rot) {
|
||||
|
@ -240,23 +240,23 @@ public class plasmaWordIndexClassicDB extends indexAbstractRI implements indexRI
|
|||
}
|
||||
}
|
||||
|
||||
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
|
||||
public plasmaWordIndexEntryContainer addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
|
||||
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
|
||||
// fetch the index cache
|
||||
if ((container == null) || (container.size() == 0)) return 0;
|
||||
if ((container == null) || (container.size() == 0)) return null;
|
||||
|
||||
// open file
|
||||
plasmaWordIndexEntity pi = null;
|
||||
try {
|
||||
pi = new plasmaWordIndexEntity(databaseRoot, container.wordHash(), false);
|
||||
int count = pi.addEntries(container);
|
||||
pi.addEntries(container);
|
||||
|
||||
// close and return
|
||||
pi.close(); pi = null;
|
||||
return count;
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
log.logSevere("plasmaWordIndexClassic.addEntries: " + e.getMessage());
|
||||
return 0;
|
||||
return container;
|
||||
} finally {
|
||||
if (pi != null) try{pi.close();}catch (Exception e){}
|
||||
}
|
||||
|
|
|
@ -52,11 +52,11 @@ import java.util.Properties;
|
|||
|
||||
import de.anomic.index.indexEntry;
|
||||
import de.anomic.index.indexEntryAttribute;
|
||||
import de.anomic.index.indexEntryPrototype;
|
||||
import de.anomic.index.indexAbstractEntry;
|
||||
import de.anomic.index.indexURL;
|
||||
import de.anomic.kelondro.kelondroBase64Order;
|
||||
|
||||
public final class plasmaWordIndexEntryInstance extends indexEntryPrototype implements Cloneable, indexEntry {
|
||||
public final class plasmaWordIndexEntryInstance extends indexAbstractEntry implements Cloneable, indexEntry {
|
||||
|
||||
// an wordEntry can be filled in either of two ways:
|
||||
// by the discrete values of the entry
|
||||
|
|
|
@ -1312,7 +1312,7 @@ public final class yacy {
|
|||
WordHashIterator = WordIndex.wordHashes(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
|
||||
} else if (resource.equals("assortments")) {
|
||||
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log);
|
||||
WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true, false);
|
||||
WordHashIterator = assortmentCluster.wordHashes(wordChunkStartHash, true, false);
|
||||
} else if (resource.startsWith("assortment")) {
|
||||
int a = Integer.parseInt(resource.substring(10));
|
||||
plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null);
|
||||
|
|
Loading…
Reference in New Issue
Block a user