final version of collection entry type definition

- the test phase of the new collection data structure is finished
- test data that had been generated is void. There will be no migration
- the new collection files are located in DATA/INDEX/PUBLIC/TEXT/RICOLLECTION
- the index dump is void. There will be no migration
- the new index dump is in DATA/INDEX/PUBLIC/TEXT/RICACHE

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2983 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-11-19 20:05:25 +00:00
parent 58d79a9ac1
commit e3d75f42bd
19 changed files with 290 additions and 229 deletions

View File

@ -304,7 +304,8 @@ public class IndexControl_p {
}
i++;
}
prop.put("keyhashsimilar_rows", rows);
prop.put("keyhashsimilar_rows_"+rows+"_cols", cols);
prop.put("keyhashsimilar_rows", rows + 1);
prop.put("result", "");
} catch (IOException e) {
prop.put("result", "unknown keys: " + e.getMessage());
@ -439,7 +440,7 @@ public class IndexControl_p {
prop.put("genUrlList_keyHash", keyhash);
if (index.size() == 0) {
if ((index == null) || (index.size() == 0)) {
prop.put("genUrlList", 1);
} else {
final Iterator en = index.entries();

View File

@ -83,7 +83,6 @@ import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;

View File

@ -86,10 +86,6 @@ public class indexCachedRI implements indexRI {
return entries.updated();
}
public indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, payloadrow);
}
public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean intern) {
// add the entry
if (intern) {
@ -219,10 +215,9 @@ public class indexCachedRI implements indexRI {
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, payloadrow);
c.add(riIntern.deleteContainer(wordHash), -1);
c.add(riExtern.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
indexContainer c = riIntern.deleteContainer(wordHash);
if (c == null) c = riExtern.deleteContainer(wordHash); else c.add(riExtern.deleteContainer(wordHash), -1);
if (c == null) c = backend.deleteContainer(wordHash); else c.add(backend.deleteContainer(wordHash), -1);
return c;
}

View File

@ -104,7 +104,7 @@ public class indexCollectionRI implements indexRI {
byte[] key = (byte[]) oo[0];
kelondroRowSet collection = (kelondroRowSet) oo[1];
if (collection == null) return null;
return new indexContainer(new String(key), collection);
return new indexContainer(new String(key), collection, true);
}
public void remove() {
@ -118,7 +118,7 @@ public class indexCollectionRI implements indexRI {
kelondroRowSet collection = collectionIndex.get(wordHash.getBytes(), deleteIfEmpty);
if (collection != null) collection.select(urlselection);
if ((collection == null) || (collection.size() == 0)) return null;
return new indexContainer(wordHash, collection);
return new indexContainer(wordHash, collection, true);
} catch (IOException e) {
return null;
}
@ -128,7 +128,7 @@ public class indexCollectionRI implements indexRI {
try {
kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
if (collection == null) return null;
return new indexContainer(wordHash, collection);
return new indexContainer(wordHash, collection, true);
} catch (IOException e) {
return null;
}
@ -153,7 +153,7 @@ public class indexCollectionRI implements indexRI {
}
public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow(), true);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}

View File

@ -41,30 +41,34 @@ import de.anomic.kelondro.kelondroRowSet;
public class indexContainer extends kelondroRowSet {
private String wordHash;
private boolean newRWI;
public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache) {
public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache, boolean newRWI) {
super(rowdef, objectCount, cache, kelondroBase64Order.enhancedCoder, 0, 0);
this.wordHash = wordHash;
this.newRWI = newRWI;
}
public indexContainer(String wordHash, kelondroRow rowdef) {
this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0);
public indexContainer(String wordHash, kelondroRow rowdef, boolean newRWI) {
this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0, newRWI);
}
public indexContainer(String wordHash, kelondroRowSet collection) {
public indexContainer(String wordHash, kelondroRowSet collection, boolean newRWI) {
super(collection);
this.wordHash = wordHash;
this.newRWI = newRWI;
}
public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column) {
public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column, boolean newRWI) {
super(rowdef);
this.wordHash = wordHash;
this.lastTimeWrote = 0;
this.setOrdering(ordering, column);
this.newRWI = newRWI;
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn);
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn, this.newRWI);
newContainer.add(this, -1);
return newContainer;
}
@ -123,7 +127,11 @@ public class indexContainer extends kelondroRowSet {
if (oldEntryRow == null) {
return true;
} else {
indexRWIEntry oldEntry = new indexRWIEntryOld(oldEntryRow); // FIXME: see if cloning is necessary
indexRWIEntry oldEntry;
if (entry instanceof indexRWIEntryNew)
oldEntry = new indexRWIEntryNew(oldEntryRow);
else
oldEntry = new indexRWIEntryOld(oldEntryRow); // FIXME: see if cloning is necessary
if (entry.isOlder(oldEntry)) { // A more recent Entry is already in this container
this.put(oldEntry.toKelondroEntry()); // put it back
return false;
@ -136,13 +144,19 @@ public class indexContainer extends kelondroRowSet {
public indexRWIEntry get(String urlHash) {
kelondroRow.Entry entry = this.get(urlHash.getBytes());
if (entry == null) return null;
return new indexRWIEntryOld(entry);
if (this.newRWI)
return new indexRWIEntryNew(entry);
else
return new indexRWIEntryOld(entry);
}
public indexRWIEntry remove(String urlHash) {
kelondroRow.Entry entry = this.remove(urlHash.getBytes());
if (entry == null) return null;
return new indexRWIEntryOld(entry);
if (this.newRWI)
return new indexRWIEntryNew(entry);
else
return new indexRWIEntryOld(entry);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
@ -178,7 +192,10 @@ public class indexContainer extends kelondroRowSet {
public Object next() {
kelondroRow.Entry rentry = (kelondroRow.Entry) rowEntryIterator.next();
if (rentry == null) return null;
return new indexRWIEntryOld(rentry);
if (newRWI)
return new indexRWIEntryNew(rentry);
else
return new indexRWIEntryOld(rentry);
}
public void remove() {
@ -288,7 +305,7 @@ public class indexContainer extends kelondroRowSet {
assert small.rowdef.equals(large.rowdef) : "small = " + small.rowdef.toString() + "; large = " + large.rowdef.toString();
int keylength = small.rowdef.width(0);
assert (keylength == large.rowdef.width(0));
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, small.rowdef, small.newRWI); // start with empty search result
Iterator se = small.entries();
indexRWIEntry ie0, ie1;
long stamp = System.currentTimeMillis();
@ -311,7 +328,7 @@ public class indexContainer extends kelondroRowSet {
assert i1.rowdef.equals(i2.rowdef) : "i1 = " + i1.rowdef.toString() + "; i2 = " + i2.rowdef.toString();
int keylength = i1.rowdef.width(0);
assert (keylength == i2.rowdef.width(0));
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result
indexContainer conj = new indexContainer(null, i1.rowdef, i1.newRWI); // start with empty search result
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();

View File

@ -58,6 +58,7 @@ public final class indexRAMRI implements indexRI {
private String indexArrayFileName;
private kelondroRow payloadrow;
private kelondroRow bufferStructureBasis;
private boolean newRWI;
// calculated constants
private static String maxKey;
@ -66,7 +67,7 @@ public final class indexRAMRI implements indexRI {
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log, boolean newRWI) {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
@ -78,6 +79,7 @@ public final class indexRAMRI implements indexRI {
this.cacheMaxCount = 10000;
this.cacheReferenceLimit = wCacheReferenceLimitInit;
this.log = log;
this.newRWI = newRWI;
this.indexArrayFileName = dumpname;
this.payloadrow = payloadrow;
this.bufferStructureBasis = new kelondroRow(
@ -178,7 +180,10 @@ public final class indexRAMRI implements indexRI {
if ((row == null) || (row.empty(0)) || (row.empty(3))) continue;
wordHash = row.getColString(0, "UTF-8");
//creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new indexRWIEntryOld(row.getColBytes(3));
if (newRWI)
wordEntry = new indexRWIEntryNew(row.getColBytes(3));
else
wordEntry = new indexRWIEntryOld(row.getColBytes(3));
// store to cache
addEntry(wordHash, wordEntry, startTime, false);
urlCount++;
@ -421,25 +426,29 @@ public final class indexRAMRI implements indexRI {
public synchronized indexContainer addEntries(indexContainer container, long updateTime, boolean dhtCase) {
// this puts the entries into the cache, not into the assortment directly
int added = 0;
if ((container == null) || (container.size() == 0)) return null;
// put new words into cache
// put container into wCache
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexContainer(wordHash, container.row());
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) {
entries = container.topLevelClone();
added = entries.size();
} else {
added = entries.add(container, -1);
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
}
if (added > 0) {
cache.put(wordHash, entries);
hashScore.addScore(wordHash, added);
hashDate.setScore(wordHash, intTime(updateTime));
}
entries = null;
return null;
}
public synchronized indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
if (container == null) container = new indexContainer(wordHash, this.payloadrow, newEntry instanceof indexRWIEntryNew);
indexRWIEntry[] entries = new indexRWIEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);

View File

@ -52,7 +52,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
new kelondroColumn("y", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "lother"),
new kelondroColumn("m", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "urlLength"),
new kelondroColumn("n", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "urlComps"),
new kelondroColumn("g", kelondroColumn.celltype_string, kelondroColumn.encoder_bytes, 1, "typeofword"),
new kelondroColumn("g", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 1, "typeofword"),
new kelondroColumn("z", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, 4, "flags"),
new kelondroColumn("c", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 1, "hitcount"),
new kelondroColumn("t", kelondroColumn.celltype_cardinal, kelondroColumn.encoder_b256, 2, "posintext"),
@ -132,7 +132,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
this.entry.setCol(col_lother, outlinksOther);
this.entry.setCol(col_urlLength, urlLength);
this.entry.setCol(col_urlComps, urlComps);
this.entry.setCol(col_typeofword, 0); // TODO: grammatical classification
this.entry.setCol(col_typeofword, new byte[]{(byte) 0}); // TODO: grammatical classification
this.entry.setCol(col_flags, null); // TODO: generate flags
this.entry.setCol(col_hitcount, hitcount);
this.entry.setCol(col_posintext, posintext);
@ -159,7 +159,7 @@ public class indexRWIEntryNew implements Cloneable, indexRWIEntry {
int domlen = plasmaURL.domLengthEstimation(oldEntry.urlHash());
this.entry.setCol(col_urlLength, domlen * 2); // estimated
this.entry.setCol(col_urlComps, domlen / 3); // estimated
this.entry.setCol(col_typeofword, 0);
this.entry.setCol(col_typeofword, new byte[]{(byte) 0});
this.entry.setCol(col_flags, null);
this.entry.setCol(col_hitcount, oldEntry.hitcount());
this.entry.setCol(col_posintext, oldEntry.posintext());

View File

@ -160,7 +160,13 @@ public class indexURLEntryOld implements indexURLEntry {
this.snippet = prop.getProperty("snippet", "");
if (snippet.length() == 0) snippet = null;
else snippet = crypt.simpleDecode(snippet, null);
this.word = (prop.containsKey("word")) ? new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", ""))) : null;
this.word = null;
if (prop.containsKey("word")) {
this.word = new indexRWIEntryOld(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("word", "")));
}
if (prop.containsKey("wi")) {
this.word = new indexRWIEntryNew(kelondroBase64Order.enhancedCoder.decodeString(prop.getProperty("wi", "")));
}
} catch (Exception e) {
serverLog.logSevere("PLASMA",
"INTERNAL ERROR in plasmaLURL.entry/2:"

View File

@ -298,9 +298,9 @@ public class kelondroBase64Order extends kelondroAbstractOrder implements kelond
bc = b[boffset + i];
assert (bc >= 0) && (bc < 128) : "bc = " + bc + ", b = " + serverLog.arrayList(b, boffset, len);
acc = ahpla[ac];
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + serverLog.table(a, 16, aoffset);
assert (acc >= 0) : "acc = " + acc + ", a = " + serverLog.arrayList(a, aoffset, len) + "/" + new String(a, aoffset, len) + ", aoffset = 0x" + Integer.toHexString(aoffset) + ", i = " + i + "\n" + serverLog.table(a, 16, aoffset);
bcc = ahpla[bc];
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + serverLog.table(b, 16, boffset);
assert (bcc >= 0) : "bcc = " + bcc + ", b = " + serverLog.arrayList(b, boffset, len) + "/" + new String(b, boffset, len) + ", boffset = 0x" + Integer.toHexString(boffset) + ", i = " + i + "\n" + serverLog.table(b, 16, boffset);
if (acc > bcc) return 1;
if (acc < bcc) return -1;
// else the bytes are equal and it may go on yet undecided

View File

@ -130,7 +130,7 @@ public class kelondroRowCollection {
}
public static final int exportOverheadSize = 14;
public byte[] exportCollection() {
// returns null if the collection is empty
trim();
@ -147,7 +147,7 @@ public class kelondroRowCollection {
entry.setCol(exp_collection, chunkcache);
return entry.bytes();
}
public kelondroRow row() {
return this.rowdef;
}
@ -155,7 +155,7 @@ public class kelondroRowCollection {
private final void ensureSize(int elements) {
int needed = elements * rowdef.objectsize();
if (chunkcache.length >= needed) return;
byte[] newChunkcache = new byte[needed * 12 / 10]; // increase space by 20%
byte[] newChunkcache = new byte[needed * 2]; // increase space
System.arraycopy(chunkcache, 0, newChunkcache, 0, chunkcache.length);
chunkcache = newChunkcache;
newChunkcache = null;
@ -441,7 +441,7 @@ public class kelondroRowCollection {
protected final int swap(int i, int j, int p) {
if (i == j) return p;
if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) {
if ((this.chunkcount + 1) * this.rowdef.objectsize() < this.chunkcache.length) {
// there is space in the chunkcache that we can use as buffer
System.arraycopy(chunkcache, this.rowdef.objectsize() * i, chunkcache, chunkcache.length - this.rowdef.objectsize(), this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());

View File

@ -175,8 +175,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
public void shape() {
//System.out.println("SHAPE");
if (this.sortOrder == null) return; // we cannot shape without an object order
assert (this.sortOrder != null); // we cannot shape without an object order
synchronized (chunkcache) {
resolveMarkedRemoved();
super.sort();
@ -246,7 +245,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
private int find(byte[] a, int astart, int alength) {
// returns the chunknumber; -1 if not found
if (this.sortOrder == null) return iterativeSearch(a, astart, alength);
if (this.sortOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
// check if a re-sorting make sense
if ((this.chunkcount - this.sortBound) > collectionReSortLimit) shape();
@ -256,20 +255,20 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
if (p >= 0) return p;
// then find in unsorted area
return iterativeSearch(a, astart, alength);
return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
}
private int iterativeSearch(byte[] key, int astart, int alength) {
private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) {
// returns the chunknumber
if (this.sortOrder == null) {
for (int i = this.sortBound; i < this.chunkcount; i++) {
for (int i = leftBorder; i < rightBound; i++) {
if (match(key, astart, alength, i)) return i;
}
return -1;
} else {
for (int i = this.sortBound; i < this.chunkcount; i++) {
for (int i = leftBorder; i < rightBound; i++) {
if (compare(key, astart, alength, i) == 0) return i;
}
return -1;
@ -322,7 +321,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
private boolean match(byte[] a, int astart, int alength, int chunknumber) {
if (chunknumber >= chunkcount) return false;
int i = 0;
int p = chunknumber * this.rowdef.objectsize();
int p = chunknumber * this.rowdef.objectsize() + this.rowdef.colstart[this.sortColumn];
final int len = Math.min(this.rowdef.width(this.sortColumn), Math.min(alength, a.length - astart));
while (i < len) if (a[astart + i++] != chunkcache[p++]) return false;
return ((len == this.rowdef.width(this.sortColumn)) || (chunkcache[len] == 0)) ;

View File

@ -93,7 +93,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.ranking = ranking;
this.urlStore = urlStore;
this.snippetCache = snippetCache;
this.rcContainers = new indexContainer(null, wordIndex.payloadrow());
this.rcContainers = wordIndex.emptyContainer(null);
this.rcContainerFlushCount = 0;
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
@ -139,7 +139,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
long secondaryTimeout = System.currentTimeMillis() + profileGlobal.duetime() / 3 * 2;
long primaryTimeout = System.currentTimeMillis() + profileGlobal.duetime();
primarySearchThreads = yacySearch.primaryRemoteSearches(plasmaSearchQuery.hashSet2hashString(query.queryHashes), "",
query.prefer, query.urlMask, query.maxDistance, urlStore, rcContainers, rcAbstracts,
query.prefer, query.urlMask, query.maxDistance, urlStore, wordIndex, rcContainers, rcAbstracts,
fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal, ranking);
// meanwhile do a local search
@ -280,7 +280,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
secondarySearchThreads[c++] = yacySearch.secondaryRemoteSearch(
words, urls, urlStore, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
words, urls, urlStore, wordIndex, rcContainers, peer, plasmaSwitchboard.urlBlacklist, snippetCache,
profileGlobal, ranking);
}
@ -357,7 +357,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
assert (rcLocal != null);
indexContainer searchResult = new indexContainer(null, rcLocal.row());
indexContainer searchResult = wordIndex.emptyContainer(null);
long preorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.startTimer();

View File

@ -134,7 +134,6 @@ import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroException;
@ -237,6 +236,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public dbImportManager dbImportManager;
public plasmaDHTFlush transferIdxThread = null;
private plasmaDHTChunk dhtTransferChunk = null;
private boolean newIndex;
/*
* Remote Proxy configuration
@ -434,8 +434,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
ramNURL, getConfigBool("useFlexTableForNURL", false),
ramEURL, getConfigBool("useFlexTableForEURL", true),
ramLURL_time);
newIndex = getConfigBool("useCollectionIndex", false);
try {
wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
wordIndex = new plasmaWordIndex(plasmaPath, indexPath, true, ramRWI, ramRWI_time, log, newIndex);
} catch (IOException e1) {
e1.printStackTrace();
System.exit(-1);
@ -1672,28 +1673,28 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String word = (String) wentry.getKey();
wordStat = (plasmaCondenser.wordStatProp) wentry.getValue();
String wordHash = plasmaURL.word2hash(word);
indexRWIEntry wordIdxEntry = new indexRWIEntryOld(
urlHash,
urlLength, urlComps,
wordStat.count,
document.getMainLongTitle().length(),
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
wordStat.posInText,
wordStat.posInPhrase,
wordStat.numOfPhrase,
0,
newEntry.size(),
docDate.getTime(),
System.currentTimeMillis(),
condenser.RESULT_WORD_ENTROPHY,
language,
doctype,
ioLinks[0].intValue(),
ioLinks[1].intValue(),
true
);
indexContainer wordIdxContainer = new indexContainer(wordHash, wordIndex.payloadrow());
indexRWIEntry wordIdxEntry = wordIndex.newRWIEntry(
urlHash,
urlLength, urlComps,
wordStat.count,
document.getMainLongTitle().length(),
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
wordStat.posInText,
wordStat.posInPhrase,
wordStat.numOfPhrase,
0,
newEntry.size(),
docDate.getTime(),
System.currentTimeMillis(),
condenser.RESULT_WORD_ENTROPHY,
language,
doctype,
ioLinks[0].intValue(),
ioLinks[1].intValue(),
true
);
indexContainer wordIdxContainer = wordIndex.emptyContainer(wordHash);
wordIdxContainer.add(wordIdxEntry);
tmpContainers.add(wordIdxContainer);
}

View File

@ -43,6 +43,7 @@ import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRAMRI;
import de.anomic.index.indexRI;
import de.anomic.index.indexRWIEntryNew;
import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
@ -60,7 +61,8 @@ public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
private static final kelondroRow payloadrow = indexRWIEntryOld.urlEntryRow;
private static final kelondroRow payloadrowold = indexRWIEntryOld.urlEntryRow;
private static final kelondroRow payloadrownew = indexRWIEntryNew.urlEntryRow;
private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
@ -75,27 +77,31 @@ public final class plasmaWordIndex implements indexRI {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException {
this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrow, log);
this.dhtOutCache = new indexRAMRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log);
this.dhtInCache = new indexRAMRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log);
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrowold, log);
File textindexcache = new File(newIndexRoot, "PUBLIC/TEXT/RICACHE");
if (!(textindexcache.exists())) textindexcache.mkdirs();
if (useCollectionIndex) {
this.dhtOutCache = new indexRAMRI(textindexcache, payloadrownew, 1024, "dump1.array", log, true);
this.dhtInCache = new indexRAMRI(textindexcache, payloadrownew, 1024, "dump2.array", log, true);
} else {
this.dhtOutCache = new indexRAMRI(oldDatabaseRoot, payloadrowold, 64, "indexDump1.array", log, false);
this.dhtInCache = new indexRAMRI(oldDatabaseRoot, payloadrowold, 64, "indexDump2.array", log, false);
}
// create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
this.assortmentBufferSize = bufferkb;
// create collections storage path
File textindexpath = new File(newIndexRoot, "PUBLIC/TEXT");
if (!(textindexpath.exists())) textindexpath.mkdirs();
File textindexcollections = new File(newIndexRoot, "PUBLIC/TEXT/RICOLLECTION");
if (!(textindexcollections.exists())) textindexcollections.mkdirs();
if (useCollectionIndex) {
this.collections = new indexCollectionRI(textindexpath, "test_generation1", bufferkb * 1024, preloadTime, payloadrow);
if (assortmentClusterPath.exists())
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log);
else
this.assortmentCluster = null;
this.collections = new indexCollectionRI(textindexcollections, "collection", bufferkb * 1024, preloadTime, payloadrownew);
this.assortmentCluster = null;
} else {
this.collections = null;
if (!(assortmentClusterPath.exists())) assortmentClusterPath.mkdirs();
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log);
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrowold, assortmentBufferSize, preloadTime, log);
}
busyCacheFlush = false;
@ -105,7 +111,38 @@ public final class plasmaWordIndex implements indexRI {
}
public kelondroRow payloadrow() {
return payloadrow;
if (useCollectionIndex) return payloadrownew; else return payloadrowold;
}
public indexRWIEntry newRWIEntry(
String urlHash,
int urlLength,
int urlComps,
int titleLength,
int hitcount,
int wordcount,
int phrasecount,
int posintext,
int posinphrase,
int posofphrase,
int worddistance,
int sizeOfPage,
long lastmodified,
long updatetime,
int quality,
String language,
char doctype,
int outlinksSame,
int outlinksOther,
boolean local ) {
if (useCollectionIndex)
return new indexRWIEntryNew(urlHash, urlLength, urlComps, titleLength, hitcount, wordcount, phrasecount,
posintext, posinphrase, posofphrase, worddistance, sizeOfPage, lastmodified, updatetime, quality, language, doctype,
outlinksSame, outlinksOther, local);
else
return new indexRWIEntryOld(urlHash, urlLength, urlComps, titleLength, hitcount, wordcount, phrasecount,
posintext, posinphrase, posofphrase, worddistance, sizeOfPage, lastmodified, updatetime, quality, language, doctype,
outlinksSame, outlinksOther, local);
}
public File getRoot() {
@ -198,10 +235,12 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer emptyContainer(String wordHash) {
return new indexContainer(wordHash, payloadrow);
return new indexContainer(wordHash, payloadrow(), useCollectionIndex);
}
public indexContainer addEntry(String wordHash, indexRWIEntry entry, long updateTime, boolean dhtInCase) {
if ((useCollectionIndex) && (entry instanceof indexRWIEntryOld)) entry = new indexRWIEntryNew((indexRWIEntryOld) entry);
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(wordHash))) dhtInCase = true;
@ -215,7 +254,21 @@ public final class plasmaWordIndex implements indexRI {
return null;
}
private indexContainer convertOld2New(indexContainer entries) {
// convert old entries to new entries
indexContainer newentries = new indexContainer(entries.getWordHash(), payloadrownew, useCollectionIndex);
Iterator i = entries.entries();
indexRWIEntryOld old;
while (i.hasNext()) {
old = (indexRWIEntryOld) i.next();
newentries.add(new indexRWIEntryNew(old));
}
return newentries;
}
public indexContainer addEntries(indexContainer entries, long updateTime, boolean dhtInCase) {
if ((useCollectionIndex) && (entries.row().objectsize() == payloadrowold.objectsize())) entries = convertOld2New(entries);
// set dhtInCase depending on wordHash
if ((!dhtInCase) && (yacyDHTAction.shallBeOwnWord(entries.getWordHash()))) dhtInCase = true;
@ -330,23 +383,23 @@ public final class plasmaWordIndex implements indexRI {
wprop = (plasmaCondenser.wordStatProp) wentry.getValue();
// if ((s.length() > 4) && (c > 1)) System.out.println("# " + s + ":" + c);
wordHash = plasmaURL.word2hash(word);
ientry = new indexRWIEntryOld(urlHash,
urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(),
wprop.count,
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
wprop.posInText,
wprop.posInPhrase,
wprop.numOfPhrase,
0,
size,
urlModified.getTime(),
System.currentTimeMillis(),
condenser.RESULT_WORD_ENTROPHY,
language,
doctype,
outlinksSame, outlinksOther,
true);
ientry = newRWIEntry(urlHash,
urlLength, urlComps, (document == null) ? urlLength : document.getMainLongTitle().length(),
wprop.count,
condenser.RESULT_SIMI_WORDS,
condenser.RESULT_SIMI_SENTENCES,
wprop.posInText,
wprop.posInPhrase,
wprop.numOfPhrase,
0,
size,
urlModified.getTime(),
System.currentTimeMillis(),
condenser.RESULT_WORD_ENTROPHY,
language,
doctype,
outlinksSame, outlinksOther,
true);
addEntry(wordHash, ientry, System.currentTimeMillis(), false);
}
// System.out.println("DEBUG: plasmaSearch.addPageIndex: added " +
@ -357,23 +410,22 @@ public final class plasmaWordIndex implements indexRI {
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
long start = System.currentTimeMillis();
// get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1);
// get from cache
indexContainer container = dhtOutCache.getContainer(wordHash, urlselection, true, -1);
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, true, -1);
} else {
container.add(dhtInCache.getContainer(wordHash, urlselection, true, -1), -1);
}
// get from collection index
if (useCollectionIndex) {
if (container == null) {
container = dhtInCache.getContainer(wordHash, urlselection, true, -1);
container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(dhtInCache.getContainer(wordHash, urlselection, true, -1), -1);
container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
}
// get from collection index
if (useCollectionIndex) {
if (container == null) {
container = collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(collections.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
}
}
} else {
// get from assortments
if (assortmentCluster != null) {
if (container == null) {
@ -383,7 +435,7 @@ public final class plasmaWordIndex implements indexRI {
container.add(assortmentCluster.getContainer(wordHash, urlselection, true, (maxTime < 0) ? -1 : maxTime), -1);
}
}
// get from backend
if (maxTime > 0) {
maxTime = maxTime - (System.currentTimeMillis() - start);
@ -394,7 +446,8 @@ public final class plasmaWordIndex implements indexRI {
} else {
container.add(backend.getContainer(wordHash, urlselection, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime), -1);
}
return container;
}
return container;
}
public Map getContainers(Set wordHashes, Set urlselection, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
@ -429,10 +482,7 @@ public final class plasmaWordIndex implements indexRI {
public int size() {
if (useCollectionIndex)
return java.lang.Math.max(collections.size(),
java.lang.Math.max((assortmentCluster == null) ? 0 : assortmentCluster.size(),
java.lang.Math.max(backend.size(),
java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()))));
return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()));
else
return java.lang.Math.max((assortmentCluster == null) ? 0 : assortmentCluster.size(),
java.lang.Math.max(backend.size(),
@ -441,17 +491,18 @@ public final class plasmaWordIndex implements indexRI {
public int indexSize(String wordHash) {
int size = 0;
try {
plasmaWordIndexFile entity = backend.getEntity(wordHash, true, -1);
if (entity != null) {
size += entity.size();
entity.close();
}
} catch (IOException e) {}
if (useCollectionIndex) size += collections.indexSize(wordHash);
size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
if (useCollectionIndex) {
size += collections.indexSize(wordHash);
} else try {
size += (assortmentCluster == null) ? 0 : assortmentCluster.indexSize(wordHash);
size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
plasmaWordIndexFile entity = backend.getEntity(wordHash, true, -1);
if (entity != null) {
size += entity.size();
entity.close();
}
} catch (IOException e) {}
return size;
}
@ -459,44 +510,51 @@ public final class plasmaWordIndex implements indexRI {
synchronized (this) {
dhtInCache.close(waitingBoundSeconds);
dhtOutCache.close(waitingBoundSeconds);
if (useCollectionIndex) collections.close(-1);
if (assortmentCluster != null) assortmentCluster.close(-1);
backend.close(10);
if (useCollectionIndex) {
collections.close(-1);
} else {
if (assortmentCluster != null) assortmentCluster.close(-1);
backend.close(10);
}
}
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash, payloadrow);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1);
indexContainer c = new indexContainer(wordHash, payloadrow(), useCollectionIndex);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
if (useCollectionIndex) {
c.add(collections.deleteContainer(wordHash), -1);
} else {
if (assortmentCluster != null) c.add(assortmentCluster.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
return c;
}
return c;
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
boolean removed = false;
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
if (useCollectionIndex) {removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete));}
removed = removed | (dhtInCache.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | (dhtOutCache.removeEntry(wordHash, urlHash, deleteComplete));
if (useCollectionIndex) {
removed = removed | (collections.removeEntry(wordHash, urlHash, deleteComplete));
} else {
if (assortmentCluster != null) removed = removed | (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete));
removed = removed | backend.removeEntry(wordHash, urlHash, deleteComplete);
return removed;
}
return removed;
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
int removed = 0;
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete);
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete);
//if (removed == urlHashes.size()) return removed;
if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
//if (removed == urlHashes.size()) return removed;
} else if (assortmentCluster != null) {
removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
if (assortmentCluster != null) removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
//if (removed == urlHashes.size()) return removed;
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
return removed;
}
@ -505,10 +563,11 @@ public final class plasmaWordIndex implements indexRI {
removed += dhtInCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += dhtOutCache.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
} else removed += "0, ";
if (assortmentCluster != null) removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
} else {
if (assortmentCluster != null) removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete) + ", ";
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
return removed;
}
@ -522,7 +581,7 @@ public final class plasmaWordIndex implements indexRI {
// urlHash assigned. This can only work if the entry is really fresh
// and can be found in the RAM cache
// this returns the number of deletion that had been possible
return dhtInCache.tryRemoveURLs(urlHash);
return dhtInCache.tryRemoveURLs(urlHash) | dhtOutCache.tryRemoveURLs(urlHash);
}
public TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) throws IOException {
@ -562,28 +621,15 @@ public final class plasmaWordIndex implements indexRI {
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) {
return dhtOutCache.wordContainers(startWordHash, false);
}
if ((resourceLevel == plasmaWordIndex.RL_COLLECTIONS) && (useCollectionIndex)) {
if (useCollectionIndex) {
return new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true);
}
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
if (useCollectionIndex) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true),
(assortmentCluster == null) ? null : assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true);
} else {
} else {
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
return new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
(assortmentCluster == null) ? null : assortmentCluster.wordContainers(startWordHash, true, false),
@ -591,26 +637,7 @@ public final class plasmaWordIndex implements indexRI {
indexContainer.containerMergeMethod,
true);
}
}
if (resourceLevel == plasmaWordIndex.RL_WORDFILES) {
if (useCollectionIndex) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true),
(assortmentCluster == null) ? null : assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true),
backend.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexContainer.containerMergeMethod,
true);
} else {
if (resourceLevel == plasmaWordIndex.RL_WORDFILES) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
dhtOutCache.wordContainers(startWordHash, false),
@ -719,7 +746,7 @@ public final class plasmaWordIndex implements indexRI {
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
indexContainer container = new indexContainer(wordhash, payloadrow);
indexContainer container = new indexContainer(wordhash, payloadrow(), useCollectionIndex);
try {
Iterator entries = entity.elements(true);

View File

@ -218,7 +218,7 @@ public final class plasmaWordIndexAssortment {
if (row == null) return null;
String wordHash = row.getColString(0, null);
final long updateTime = row.getColLong(2);
indexContainer container = new indexContainer(wordHash, payloadrow);
indexContainer container = new indexContainer(wordHash, payloadrow, false);
int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) {
container.add(new indexRWIEntry[] { new indexRWIEntryOld(row.getColBytes(3 + i)) }, updateTime);

View File

@ -165,7 +165,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
indexContainer c;
Iterator i = newContainer.entries();
for (int j = clusterStart; j >= 1; j--) {
c = new indexContainer(newContainer.getWordHash(), payloadrow);
c = new indexContainer(newContainer.getWordHash(), payloadrow, false);
for (int k = 0; k < j; k++) {
if (i.hasNext()) {
c.add((indexRWIEntry) i.next(), newContainer.updated());
@ -179,7 +179,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
}
public indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, payloadrow);
indexContainer container = new indexContainer(wordHash, payloadrow, false);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
@ -220,7 +220,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
Iterator i = newContainer.entries();
for (int j = testsize - 1; j >= 0; j--) {
if (spaces[j] == 0) continue;
c = new indexContainer(newContainer.getWordHash(), payloadrow);
c = new indexContainer(newContainer.getWordHash(), payloadrow, false);
for (int k = 0; k <= j; k++) {
assert (i.hasNext());
c.add((indexRWIEntry) i.next(), newContainer.updated());
@ -258,7 +258,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
public indexContainer deleteContainer(String wordHash, long maxTime) {
// removes all records from all the assortments and return them
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow, false);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {
@ -283,7 +283,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
*/
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow, false);
boolean found = false;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -299,7 +299,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow, false);
int initialSize = urlHashes.size();
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -324,7 +324,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow, false);
long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].get(wordHash);

View File

@ -234,7 +234,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (exists(wordHash)) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexContainer container = new indexContainer(wordHash, payloadrow);
indexContainer container = new indexContainer(wordHash, payloadrow, false);
indexRWIEntry entry;
Iterator i = entity.elements(true);
while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) {
@ -243,7 +243,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
return container;
} else {
return new indexContainer(wordHash, payloadrow);
return new indexContainer(wordHash, payloadrow, false);
}
}
@ -258,7 +258,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
public indexContainer deleteContainer(String wordHash) {
plasmaWordIndexFile.removePlasmaIndex(databaseRoot, wordHash);
return new indexContainer(wordHash, payloadrow);
return new indexContainer(wordHash, payloadrow, false);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
@ -303,7 +303,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
public indexContainer addEntry(String wordHash, indexRWIEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash, payloadrow);
indexContainer container = new indexContainer(wordHash, payloadrow, false);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}

View File

@ -57,7 +57,6 @@ import de.anomic.http.httpc;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.plasma.plasmaURL;
import de.anomic.index.indexRWIEntryOld;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
@ -66,6 +65,7 @@ import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCodings;
@ -373,6 +373,7 @@ public final class yacyClient {
boolean global,
yacySeed targetPeer,
plasmaCrawlLURL urlManager,
plasmaWordIndex wordIndex,
indexContainer containerCache,
Map abstractCache,
plasmaURLPattern blacklist,
@ -493,7 +494,7 @@ public final class yacyClient {
final int words = wordhashes.length() / yacySeedDB.commonHashLength;
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
container[i] = new indexContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength), indexRWIEntryOld.urlEntryRow);
container[i] = wordIndex.emptyContainer(wordhashes.substring(i * yacySeedDB.commonHashLength, (i + 1) * yacySeedDB.commonHashLength));
}
// insert results to containers
@ -517,7 +518,7 @@ public final class yacyClient {
int urlLength = comp.url().toNormalform().length();
int urlComps = htmlFilterContentScraper.urlComps(comp.url().toNormalform()).length;
entry = new indexRWIEntryOld(
entry = wordIndex.newRWIEntry(
urlEntry.hash(),
urlLength,
urlComps,

View File

@ -57,6 +57,7 @@ import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog;
@ -65,6 +66,7 @@ public class yacySearch extends Thread {
final private String wordhashes, urlhashes;
final private boolean global;
final private plasmaCrawlLURL urlManager;
final private plasmaWordIndex wordIndex;
final private indexContainer containerCache;
final private Map abstractCache;
final private plasmaURLPattern blacklist;
@ -77,7 +79,7 @@ public class yacySearch extends Thread {
final private String prefer, filter;
public yacySearch(String wordhashes, String urlhashes, String prefer, String filter, int maxDistance,
boolean global, yacySeed targetPeer, plasmaCrawlLURL urlManager,
boolean global, yacySeed targetPeer, plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache,
plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
@ -89,6 +91,7 @@ public class yacySearch extends Thread {
this.filter = filter;
this.global = global;
this.urlManager = urlManager;
this.wordIndex = wordIndex;
this.containerCache = containerCache;
this.abstractCache = abstractCache;
this.blacklist = blacklist;
@ -101,7 +104,7 @@ public class yacySearch extends Thread {
}
public void run() {
this.urls = yacyClient.search(wordhashes, urlhashes, prefer, filter, maxDistance, global, targetPeer, urlManager, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
this.urls = yacyClient.search(wordhashes, urlhashes, prefer, filter, maxDistance, global, targetPeer, urlManager, wordIndex, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
if (urls != null) {
StringBuffer urllist = new StringBuffer(this.urls.length * 13);
for (int i = 0; i < this.urls.length; i++) urllist.append(this.urls[i]).append(' ');
@ -194,7 +197,8 @@ public class yacySearch extends Thread {
return result;
}
public static yacySearch[] primaryRemoteSearches(String wordhashes, String urlhashes, String prefer, String filter, int maxDist, plasmaCrawlLURL urlManager,
public static yacySearch[] primaryRemoteSearches(String wordhashes, String urlhashes, String prefer, String filter, int maxDist,
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache, Map abstractCache,
int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
@ -210,14 +214,16 @@ public class yacySearch extends Thread {
yacySearch[] searchThreads = new yacySearch[targets];
for (int i = 0; i < targets; i++) {
searchThreads[i]= new yacySearch(wordhashes, urlhashes, prefer, filter, maxDist, true, targetPeers[i],
urlManager, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
urlManager, wordIndex, containerCache, abstractCache, blacklist, snippetCache, timingProfile, rankingProfile);
searchThreads[i].start();
//try {Thread.sleep(20);} catch (InterruptedException e) {}
}
return searchThreads;
}
public static yacySearch secondaryRemoteSearch(String wordhashes, String urlhashes, plasmaCrawlLURL urlManager, indexContainer containerCache,
public static yacySearch secondaryRemoteSearch(String wordhashes, String urlhashes,
plasmaCrawlLURL urlManager, plasmaWordIndex wordIndex,
indexContainer containerCache,
String targethash, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache,
plasmaSearchTimingProfile timingProfile, plasmaSearchRankingProfile rankingProfile) {
// check own peer status
@ -228,7 +234,7 @@ public class yacySearch extends Thread {
final yacySeed targetPeer = yacyCore.seedDB.getConnected(targethash);
if (targetPeer == null) return null;
yacySearch searchThread = new yacySearch(wordhashes, urlhashes, "", "", 9999, true, targetPeer,
urlManager, containerCache, new TreeMap(), blacklist, snippetCache, timingProfile, rankingProfile);
urlManager, wordIndex, containerCache, new TreeMap(), blacklist, snippetCache, timingProfile, rankingProfile);
searchThread.start();
return searchThread;
}