generalization of payload definition of index storage

this is one step forward to the migration to a new collection data format

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2912 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-11-05 02:10:40 +00:00
parent 29a1f132ec
commit 8fdefd5c68
18 changed files with 150 additions and 99 deletions

View File

@ -207,10 +207,15 @@ public final class search {
// join and order the result
indexContainer localResults = theSearch.localSearchJoin(containers.values());
joincount = localResults.size();
prop.put("joincount", Integer.toString(joincount));
acc = theSearch.orderFinal(localResults);
if (localResults == null) {
joincount = 0;
prop.put("joincount", 0);
acc = null;
} else {
joincount = localResults.size();
prop.put("joincount", Integer.toString(joincount));
acc = theSearch.orderFinal(localResults);
}
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific
// urls, because it is a re-search

View File

@ -32,8 +32,8 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCollectionIndex;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOutOfLimitsException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowCollection;
@ -44,18 +44,17 @@ public class indexCollectionRI implements indexRI {
kelondroCollectionIndex collectionIndex;
public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime) {
kelondroRow rowdef = indexURLEntry.urlEntryRow;
public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime, kelondroRow payloadrow) {
try {
collectionIndex = new kelondroCollectionIndex(
path,
filenameStub,
12 /*keyLength*/,
kelondroNaturalOrder.naturalOrder,
kelondroBase64Order.enhancedCoder,
buffersize,
preloadTime,
4 /*loadfactor*/,
rowdef);
payloadrow);
} catch (IOException e) {
serverLog.logSevere("PLASMA", "unable to open collection index at " + path.toString() + ":" + e.getMessage());
}
@ -154,7 +153,7 @@ public class indexCollectionRI implements indexRI {
}
public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
indexContainer container = new indexContainer(wordHash, collectionIndex.payloadRow());
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}

View File

@ -34,7 +34,6 @@ import java.util.Set;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
@ -43,8 +42,13 @@ public class indexContainer extends kelondroRowSet {
private String wordHash;
public indexContainer(String wordHash) {
this(wordHash, new kelondroNaturalOrder(true), 0);
public indexContainer(String wordHash, kelondroRow rowdef, int objectCount, byte[] cache) {
super(rowdef, objectCount, cache, kelondroBase64Order.enhancedCoder, 0, 0);
this.wordHash = wordHash;
}
public indexContainer(String wordHash, kelondroRow rowdef) {
this(wordHash, rowdef, kelondroBase64Order.enhancedCoder, 0);
}
public indexContainer(String wordHash, kelondroRowSet collection) {
@ -52,15 +56,15 @@ public class indexContainer extends kelondroRowSet {
this.wordHash = wordHash;
}
public indexContainer(String wordHash, kelondroOrder ordering, int column) {
super(indexURLEntry.urlEntryRow);
public indexContainer(String wordHash, kelondroRow rowdef, kelondroOrder ordering, int column) {
super(rowdef);
this.wordHash = wordHash;
this.lastTimeWrote = 0;
this.setOrdering(ordering, column);
}
public indexContainer topLevelClone() {
indexContainer newContainer = new indexContainer(this.wordHash, this.sortOrder, this.sortColumn);
indexContainer newContainer = new indexContainer(this.wordHash, this.rowdef, this.sortOrder, this.sortColumn);
newContainer.add(this, -1);
return newContainer;
}
@ -220,7 +224,7 @@ public class indexContainer extends kelondroRowSet {
singleContainer = (indexContainer) i.next();
// check result
if ((singleContainer == null) || (singleContainer.size() == 0)) return new indexContainer(null); // as this is a cunjunction of searches, we have no result if any word is not known
if ((singleContainer == null) || (singleContainer.size() == 0)) return null; // as this is a cunjunction of searches, we have no result if any word is not known
// store result in order of result size
map.put(new Long(singleContainer.size() * 1000 + count), singleContainer);
@ -228,7 +232,7 @@ public class indexContainer extends kelondroRowSet {
}
// check if there is any result
if (map.size() == 0) return new indexContainer(null); // no result, nothing found
if (map.size() == 0) return null; // no result, nothing found
// the map now holds the search results in order of number of hits per word
// we now must pairwise build up a conjunction of these sets
@ -247,7 +251,7 @@ public class indexContainer extends kelondroRowSet {
}
// in 'searchResult' is now the combined search result
if (searchResult.size() == 0) return new indexContainer(null);
if (searchResult.size() == 0) return null;
return searchResult;
}
@ -260,7 +264,7 @@ public class indexContainer extends kelondroRowSet {
public static indexContainer joinConstructive(indexContainer i1, indexContainer i2, long time, int maxDistance) {
if ((i1 == null) || (i2 == null)) return null;
if ((i1.size() == 0) || (i2.size() == 0)) return new indexContainer(null);
if ((i1.size() == 0) || (i2.size() == 0)) return null;
// decide which method to use
int high = ((i1.size() > i2.size()) ? i1.size() : i2.size());
@ -281,7 +285,8 @@ public class indexContainer extends kelondroRowSet {
private static indexContainer joinConstructiveByTest(indexContainer small, indexContainer large, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY TEST");
indexContainer conj = new indexContainer(null); // start with empty search result
assert small.rowdef.equals(large);
indexContainer conj = new indexContainer(null, small.rowdef); // start with empty search result
Iterator se = small.entries();
indexEntry ie0, ie1;
long stamp = System.currentTimeMillis();
@ -299,7 +304,8 @@ public class indexContainer extends kelondroRowSet {
private static indexContainer joinConstructiveByEnumeration(indexContainer i1, indexContainer i2, long time, int maxDistance) {
System.out.println("DEBUG: JOIN METHOD BY ENUMERATION");
indexContainer conj = new indexContainer(null); // start with empty search result
assert i1.rowdef.equals(i2);
indexContainer conj = new indexContainer(null, i1.rowdef); // start with empty search result
if (!((i1.order().signature().equals(i2.order().signature())) &&
(i1.primarykey() == i2.primarykey()))) return conj; // ordering must be equal
Iterator e1 = i1.entries();

View File

@ -45,11 +45,6 @@ public final class indexRAMCacheRI implements indexRI {
// environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final kelondroRow bufferStructureBasis = new kelondroRow(
"byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " +
"Cardinal occ-4 {b256}, " +
"Cardinal time-8 {b256}, " +
"byte[] urlprops-" + indexURLEntry.urlEntryRow.objectsize());
// class variables
private final File databaseRoot;
@ -61,6 +56,8 @@ public final class indexRAMCacheRI implements indexRI {
public int cacheReferenceLimit;
private final serverLog log;
private String indexArrayFileName;
private kelondroRow payloadrow;
private kelondroRow bufferStructureBasis;
// calculated constants
private static String maxKey;
@ -69,7 +66,7 @@ public final class indexRAMCacheRI implements indexRI {
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
public indexRAMCacheRI(File databaseRoot, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
public indexRAMCacheRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
@ -81,7 +78,13 @@ public final class indexRAMCacheRI implements indexRI {
this.cacheMaxCount = 10000;
this.cacheReferenceLimit = wCacheReferenceLimitInit;
this.log = log;
indexArrayFileName = dumpname;
this.indexArrayFileName = dumpname;
this.payloadrow = payloadrow;
this.bufferStructureBasis = new kelondroRow(
"byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " +
"Cardinal occ-4 {b256}, " +
"Cardinal time-8 {b256}, " +
"byte[] urlprops-" + payloadrow.objectsize());
// read in dump of last session
try {
@ -91,6 +94,7 @@ public final class indexRAMCacheRI implements indexRI {
}
}
public synchronized long getUpdateTime(String wordHash) {
indexContainer entries = getContainer(wordHash, null, false, -1);
if (entries == null) return 0;
@ -423,7 +427,7 @@ public final class indexRAMCacheRI implements indexRI {
// put container into wCache
String wordHash = container.getWordHash();
indexContainer entries = (indexContainer) cache.get(wordHash); // null pointer exception? wordhash != null! must be cache==null
if (entries == null) entries = new indexContainer(wordHash);
if (entries == null) entries = new indexContainer(wordHash, container.row());
added = entries.add(container, -1);
if (added > 0) {
cache.put(wordHash, entries);
@ -436,7 +440,7 @@ public final class indexRAMCacheRI implements indexRI {
public synchronized indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = (indexContainer) cache.get(wordHash);
if (container == null) container = new indexContainer(wordHash);
if (container == null) container = new indexContainer(wordHash, this.payloadrow);
indexEntry[] entries = new indexEntry[] { newEntry };
if (container.add(entries, updateTime) > 0) {
cache.put(wordHash, container);

View File

@ -43,7 +43,7 @@ public class kelondroCollectionIndex {
private String filenameStub;
private int loadfactor;
private Map arrays; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects
private kelondroRow playloadrow; // definition of the payload (chunks inside the collections)
private kelondroRow payloadrow; // definition of the payload (chunks inside the collections)
// private int partitions; // this is the maxmimum number of array files; yet not used
private static final int idx_col_key = 0; // the index
@ -68,6 +68,10 @@ public class kelondroCollectionIndex {
);
}
public kelondroRow payloadRow() {
return this.payloadrow;
}
private static String fillZ(String s, int len) {
while (s.length() < len) s = "0" + s;
return s;
@ -94,7 +98,7 @@ public class kelondroCollectionIndex {
this.path = path;
this.filenameStub = filenameStub;
this.keylength = keyLength;
this.playloadrow = rowdef;
this.payloadrow = rowdef;
this.loadfactor = loadfactor;
boolean ramIndexGeneration = false;
@ -151,7 +155,7 @@ public class kelondroCollectionIndex {
key = aentry.getColBytes(0);
assert (key != null);
if (key == null) continue; // skip deleted entries
kelondroRowSet indexrows = new kelondroRowSet(this.playloadrow, aentry.getColBytes(1));
kelondroRowSet indexrows = new kelondroRowSet(this.payloadrow, aentry.getColBytes(1));
ientry = irow.newEntry();
ientry.setCol(idx_col_key, key);
ientry.setCol(idx_col_chunksize, chunksize);
@ -199,11 +203,11 @@ public class kelondroCollectionIndex {
}
private kelondroFixedWidthArray openArrayFile(int partitionNumber, int serialNumber, boolean create) throws IOException {
File f = arrayFile(path, filenameStub, loadfactor, playloadrow.objectsize(), partitionNumber, serialNumber);
File f = arrayFile(path, filenameStub, loadfactor, payloadrow.objectsize(), partitionNumber, serialNumber);
int load = arrayCapacity(partitionNumber);
kelondroRow rowdef = new kelondroRow(
"byte[] key-" + keylength + "," +
"byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.playloadrow.objectsize())
"byte[] collection-" + (kelondroRowCollection.exportOverheadSize + load * this.payloadrow.objectsize())
);
if ((!(f.exists())) && (!create)) return null;
kelondroFixedWidthArray a = new kelondroFixedWidthArray(f, rowdef, 0);
@ -270,7 +274,7 @@ public class kelondroCollectionIndex {
// the collection is new
int newPartitionNumber = arrayIndex(collection.size());
indexrow = index.row().newEntry();
kelondroFixedWidthArray array = getArray(newPartitionNumber, 0, this.playloadrow.objectsize());
kelondroFixedWidthArray array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
@ -282,7 +286,7 @@ public class kelondroCollectionIndex {
// store the new row number in the index
indexrow.setCol(idx_col_key, key);
indexrow.setCol(idx_col_chunksize, this.playloadrow.objectsize());
indexrow.setCol(idx_col_chunksize, this.payloadrow.objectsize());
indexrow.setCol(idx_col_chunkcount, collection.size());
indexrow.setCol(idx_col_clusteridx, (byte) newPartitionNumber);
indexrow.setCol(idx_col_flags, (byte) 0);
@ -348,7 +352,7 @@ public class kelondroCollectionIndex {
// we don't need a new slot, just write into the old one
// find array file
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.playloadrow.objectsize());
kelondroFixedWidthArray array = getArray(newPartitionNumber, newSerialNumber, this.payloadrow.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
@ -372,7 +376,7 @@ public class kelondroCollectionIndex {
array.remove(oldrownumber);
// write a new entry in the other array
array = getArray(newPartitionNumber, 0, this.playloadrow.objectsize());
array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
@ -438,7 +442,7 @@ public class kelondroCollectionIndex {
if (arrayrow == null) throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, clusteridx, serialnumber).toString(), "array does not contain expected row");
// read the row and define a collection
kelondroRowSet collection = new kelondroRowSet(this.playloadrow, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
kelondroRowSet collection = new kelondroRowSet(this.payloadrow, arrayrow.getColBytes(1)); // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
byte[] key = indexrow.getColBytes(idx_col_key);
if (index.order().compare(arrayrow.getColBytes(0), key) != 0) {
// check if we got the right row; this row is wrong. Fix it:
@ -446,7 +450,7 @@ public class kelondroCollectionIndex {
// store the row number in the index; this may be a double-entry, but better than nothing
kelondroRow.Entry indexEntry = index.row().newEntry();
indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0));
indexEntry.setCol(idx_col_chunksize, this.playloadrow.objectsize());
indexEntry.setCol(idx_col_chunksize, this.payloadrow.objectsize());
indexEntry.setCol(idx_col_chunkcount, collection.size());
indexEntry.setCol(idx_col_clusteridx, (byte) clusteridx);
indexEntry.setCol(idx_col_flags, (byte) 0);

View File

@ -470,4 +470,12 @@ public class kelondroRow {
return true;
}
public boolean equals(kelondroRow otherRow) {
if (this.objectsize != otherRow.objectsize) return false;
for (int i = 0; i < otherRow.row.length; i++) {
if (!(this.row[i].equals(otherRow.row[i]))) return false;
}
return true;
}
}

View File

@ -417,13 +417,13 @@ public class kelondroRowCollection {
if (this.chunkcount * this.rowdef.objectsize() < this.chunkcache.length) {
// there is space in the chunkcache that we can use as buffer
System.arraycopy(chunkcache, this.rowdef.objectsize() * i, chunkcache, chunkcache.length - this.rowdef.objectsize(), this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() *j , chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());
System.arraycopy(chunkcache, chunkcache.length - this.rowdef.objectsize(), chunkcache, this.rowdef.objectsize() * j, this.rowdef.objectsize());
} else {
// allocate a chunk to use as buffer
byte[] a = new byte[this.rowdef.objectsize()];
System.arraycopy(chunkcache, this.rowdef.objectsize() * i, a, 0, this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() * j , chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());
System.arraycopy(chunkcache, this.rowdef.objectsize() * j, chunkcache, this.rowdef.objectsize() * i, this.rowdef.objectsize());
System.arraycopy(a, 0, chunkcache, this.rowdef.objectsize() * j, this.rowdef.objectsize());
}
if (i == p) return j; else if (j == p) return i; else return p;

View File

@ -37,12 +37,18 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
private kelondroProfile profile;
private TreeSet removeMarker;
public kelondroRowSet(kelondroRow rowdef, int objectCount, byte[] cache, kelondroOrder sortOrder, int sortColumn, int sortBound) {
super(rowdef, objectCount, cache, sortOrder, sortColumn, sortBound);
this.removeMarker = new TreeSet();
this.profile = new kelondroProfile();
}
public kelondroRowSet(kelondroRowSet rs) {
super(rs);
this.profile = rs.profile;
this.removeMarker = rs.removeMarker;
}
public kelondroRowSet(kelondroRow rowdef) {
super(rowdef, 0);
this.removeMarker = new TreeSet();

View File

@ -5,6 +5,7 @@ import java.io.IOException;
import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndexAssortment;
@ -62,7 +63,7 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{
// initializing the import assortment db
this.log.logInfo("Initializing source assortment file");
try {
this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr, this.cacheSize/1024, preloadTime, this.log);
this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, indexURLEntry.urlEntryRow, assortmentNr, this.cacheSize/1024, preloadTime, this.log);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);

View File

@ -43,7 +43,6 @@
package de.anomic.plasma.parser.swf;
import java.io.File;
import java.io.InputStream;
import de.anomic.net.URL;
import java.util.Hashtable;

View File

@ -91,7 +91,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
this.ranking = ranking;
this.urlStore = urlStore;
this.snippetCache = snippetCache;
this.rcContainers = new indexContainer(null);
this.rcContainers = new indexContainer(null, wordIndex.payloadrow());
this.rcContainerFlushCount = 0;
this.rcAbstracts = (query.size() > 1) ? new TreeMap() : null; // generate abstracts only for combined searches
this.profileLocal = localTiming;
@ -195,12 +195,14 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// combine the result and order
plasmaSearchResult result = orderFinal(rcLocal);
result.globalContributions = globalContributions;
result.localContributions = rcLocal.size();
// flush results in a separate thread
this.start(); // start to flush results
if (result != null) {
result.globalContributions = globalContributions;
result.localContributions = rcLocal.size();
// flush results in a separate thread
this.start(); // start to flush results
}
// return search result
log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
lastEvent = this;
@ -209,7 +211,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
Map searchContainerMap = localSearchContainers(null);
indexContainer rcLocal = localSearchJoin((searchContainerMap == null) ? null : searchContainerMap.values());
plasmaSearchResult result = orderFinal(rcLocal);
result.localContributions = rcLocal.size();
result.localContributions = (rcLocal == null) ? 0 : rcLocal.size();
// return search result
log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
@ -333,9 +335,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// join a search result and return the joincount (number of pages after join)
// since this is a conjunction we return an empty entity if any word is not known
if (containers == null) {
return new indexContainer(null);
}
if (containers == null) return null;
// join the result
profileLocal.startTimer();
@ -352,7 +352,8 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
// we collect the urlhashes and construct a list with urlEntry objects
// attention: if minEntries is too high, this method will not terminate within the maxTime
indexContainer searchResult = new indexContainer(null);
if (rcLocal == null) return null;
indexContainer searchResult = new indexContainer(null, rcLocal.row());
long preorderTime = profileLocal.getTargetTime(plasmaSearchTimingProfile.PROCESS_PRESORT);
profileLocal.startTimer();
@ -416,6 +417,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable {
private void prefetchLocal(indexContainer rcLocal, long timeout) {
// pre-fetch some urls to fill LURL ram cache
if (rcLocal == null) return;
plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, rcLocal, timeout - System.currentTimeMillis());
preorder.remove(true, true);

View File

@ -1662,7 +1662,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
String word = (String) wentry.getKey();
wordStat = (plasmaCondenser.wordStatProp) wentry.getValue();
String wordHash = indexEntryAttribute.word2hash(word);
indexContainer wordIdxContainer = new indexContainer(wordHash);
indexEntry wordIdxEntry = new indexURLEntry(
urlHash,
urlLength, urlComps,
@ -1684,6 +1683,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
ioLinks[1].intValue(),
true
);
indexContainer wordIdxContainer = new indexContainer(wordHash, wordIndex.payloadrow());
wordIdxContainer.add(wordIdxEntry);
tmpContainers.add(wordIdxContainer);
}

View File

@ -50,6 +50,7 @@ import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog;
@ -59,6 +60,7 @@ public final class plasmaWordIndex implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
private static final kelondroRow payloadrow = indexURLEntry.urlEntryRow;
private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
@ -73,9 +75,9 @@ public final class plasmaWordIndex implements indexRI {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException {
this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log);
this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log);
this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log);
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrow, log);
this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log);
this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log);
// create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
@ -85,15 +87,15 @@ public final class plasmaWordIndex implements indexRI {
File textindexpath = new File(newIndexRoot, "PUBLIC/TEXT");
if (!(textindexpath.exists())) textindexpath.mkdirs();
if (useCollectionIndex) {
this.collections = new indexCollectionRI(textindexpath, "test_generation1", bufferkb * 1024, preloadTime);
this.collections = new indexCollectionRI(textindexpath, "test_generation1", bufferkb * 1024, preloadTime, payloadrow);
if (assortmentClusterPath.exists())
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, preloadTime, log);
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log);
else
this.assortmentCluster = null;
} else {
this.collections = null;
if (!(assortmentClusterPath.exists())) assortmentClusterPath.mkdirs();
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, preloadTime, log);
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, payloadrow, assortmentBufferSize, preloadTime, log);
}
busyCacheFlush = false;
@ -102,6 +104,10 @@ public final class plasmaWordIndex implements indexRI {
this.idleDivisor = 420;
}
public kelondroRow payloadrow() {
return payloadrow;
}
public File getRoot() {
return oldDatabaseRoot;
}
@ -459,7 +465,7 @@ public final class plasmaWordIndex implements indexRI {
}
public indexContainer deleteContainer(String wordHash) {
indexContainer c = new indexContainer(wordHash);
indexContainer c = new indexContainer(wordHash, payloadrow);
c.add(dhtInCache.deleteContainer(wordHash), -1);
c.add(dhtOutCache.deleteContainer(wordHash), -1);
if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1);
@ -712,7 +718,7 @@ public final class plasmaWordIndex implements indexRI {
try {
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
indexContainer container = new indexContainer(wordhash);
indexContainer container = new indexContainer(wordhash, payloadrow);
try {
Iterator entries = entity.elements(true);

View File

@ -58,7 +58,7 @@ import java.util.Iterator;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRAMCacheRI;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroColumn;
@ -79,6 +79,7 @@ public final class plasmaWordIndexAssortment {
private kelondroCache assortments;
private long bufferSize;
private long preloadTime;
private kelondroRow payloadrow;
private static String intx(int x) {
String s = Integer.toString(x);
@ -86,23 +87,23 @@ public final class plasmaWordIndexAssortment {
return s;
}
private static kelondroRow bufferStructure(int assortmentCapacity) {
private kelondroRow bufferStructure(int assortmentCapacity) {
kelondroColumn[] structure = new kelondroColumn[3 + assortmentCapacity];
structure[0] = indexRAMCacheRI.bufferStructureBasis.column(0);
structure[1] = indexRAMCacheRI.bufferStructureBasis.column(1);
structure[2] = indexRAMCacheRI.bufferStructureBasis.column(2);
for (int i = 0; i < assortmentCapacity; i++) {
structure[3 + i] = indexRAMCacheRI.bufferStructureBasis.column(3);
}
structure[0] = new kelondroColumn("byte[] wordhash-" + indexEntryAttribute.wordHashLength);
structure[1] = new kelondroColumn("Cardinal occ-4 {b256}");
structure[2] = new kelondroColumn("Cardinal time-8 {b256}");
kelondroColumn p = new kelondroColumn("byte[] urlprops-" + payloadrow.objectsize());
for (int i = 0; i < assortmentCapacity; i++) structure[3 + i] = p;
return new kelondroRow(structure);
}
private static int assortmentCapacity(int rowsize) {
return (rowsize - indexRAMCacheRI.bufferStructureBasis.width(0) - indexRAMCacheRI.bufferStructureBasis.width(1) - indexRAMCacheRI.bufferStructureBasis.width(2)) / indexRAMCacheRI.bufferStructureBasis.width(3);
private int assortmentCapacity(int rowsize) {
return (rowsize - indexEntryAttribute.wordHashLength - 12) / payloadrow.objectsize();
}
public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException {
public plasmaWordIndexAssortment(File storagePath, kelondroRow payloadrow, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException {
if (!(storagePath.exists())) storagePath.mkdirs();
this.payloadrow = payloadrow;
this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db");
this.assortmentLength = assortmentLength;
//this.bufferStructureLength = 3 + 2 * assortmentLength;
@ -119,6 +120,7 @@ public final class plasmaWordIndexAssortment {
preloadTime + " ms preloadTime, " +
(stop - start) + " ms effective, " +
assortments.cacheNodeStatus()[1] + " preloaded");
}
public void store(indexContainer newContainer) throws IOException {
@ -212,11 +214,11 @@ public final class plasmaWordIndexAssortment {
return row2container(row);
}
public final static indexContainer row2container(kelondroRow.Entry row) {
public final indexContainer row2container(kelondroRow.Entry row) {
if (row == null) return null;
String wordHash = row.getColString(0, null);
final long updateTime = row.getColLong(2);
indexContainer container = new indexContainer(wordHash);
indexContainer container = new indexContainer(wordHash, payloadrow);
int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) {
container.add(new indexEntry[] { new indexURLEntry(row.getColBytes(3 + i)) }, updateTime);

View File

@ -60,6 +60,7 @@ import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroMergeIterator;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRecords;
import de.anomic.kelondro.kelondroRow;
import de.anomic.server.logging.serverLog;
public final class plasmaWordIndexAssortmentCluster implements indexRI {
@ -71,10 +72,12 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
//private serverLog log;
private plasmaWordIndexAssortment[] assortments;
private long completeBufferKB;
private kelondroRow payloadrow;
public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, int bufferkb, long preloadTime, serverLog log) throws IOException {
public plasmaWordIndexAssortmentCluster(File assortmentsPath, int clusterCount, kelondroRow payloadrow, int bufferkb, long preloadTime, serverLog log) throws IOException {
// set class variables
if (!(assortmentsPath.exists())) assortmentsPath.mkdirs();
this.payloadrow = payloadrow;
this.clusterCount = clusterCount;
this.clusterCapacity = clusterCount * (clusterCount + 1) / 2;
this.completeBufferKB = bufferkb;
@ -86,7 +89,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
int sumSizes = 1;
plasmaWordIndexAssortment testAssortment;
for (int i = 0; i < clusterCount; i++) {
testAssortment = new plasmaWordIndexAssortment(assortmentsPath, i + 1, 0, 0, null);
testAssortment = new plasmaWordIndexAssortment(assortmentsPath, payloadrow, i + 1, 0, 0, null);
sizes[i] = testAssortment.size() + clusterCount - i;
sumSizes += sizes[i];
testAssortment.close();
@ -102,7 +105,9 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
nextTime = Math.max(0, preloadTime * ((long) sizes[i]) / sS);
startTime = System.currentTimeMillis();
assortments[i] = new plasmaWordIndexAssortment(
assortmentsPath, i + 1,
assortmentsPath,
payloadrow,
i + 1,
(int) (completeBufferKB * (long) sizes[i] / (long) sumSizes),
nextTime,
log);
@ -160,7 +165,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
indexContainer c;
Iterator i = newContainer.entries();
for (int j = clusterStart; j >= 1; j--) {
c = new indexContainer(newContainer.getWordHash());
c = new indexContainer(newContainer.getWordHash(), payloadrow);
for (int k = 0; k < j; k++) {
if (i.hasNext()) {
c.add((indexEntry) i.next(), newContainer.updated());
@ -174,7 +179,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
}
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
indexContainer container = new indexContainer(wordHash, payloadrow);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}
@ -215,7 +220,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
Iterator i = newContainer.entries();
for (int j = testsize - 1; j >= 0; j--) {
if (spaces[j] == 0) continue;
c = new indexContainer(newContainer.getWordHash());
c = new indexContainer(newContainer.getWordHash(), payloadrow);
for (int k = 0; k <= j; k++) {
assert (i.hasNext());
c.add((indexEntry) i.next(), newContainer.updated());
@ -253,7 +258,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
public indexContainer deleteContainer(String wordHash, long maxTime) {
// removes all records from all the assortments and return them
indexContainer buffer, record = new indexContainer(wordHash);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
long remainingTime;
for (int i = 0; i < clusterCount; i++) {
@ -278,7 +283,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
*/
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
indexContainer buffer, record = new indexContainer(wordHash);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
boolean found = false;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -294,7 +299,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
}
public int removeEntries(String wordHash, Set urlHashes, boolean deleteComplete) {
indexContainer buffer, record = new indexContainer(wordHash);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
int initialSize = urlHashes.size();
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].remove(wordHash);
@ -319,7 +324,7 @@ public final class plasmaWordIndexAssortmentCluster implements indexRI {
public indexContainer getContainer(String wordHash, Set urlselection, boolean deleteIfEmpty, long maxTime) {
// collect all records from all the assortments and return them
indexContainer buffer, record = new indexContainer(wordHash);
indexContainer buffer, record = new indexContainer(wordHash, payloadrow);
long timeout = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
for (int i = 0; i < clusterCount; i++) {
buffer = assortments[i].get(wordHash);

View File

@ -54,6 +54,7 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexRI;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroRow;
import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB;
@ -63,9 +64,11 @@ public class plasmaWordIndexFileCluster implements indexRI {
private final File databaseRoot;
private final serverLog log;
private int size;
private kelondroRow payloadrow;
public plasmaWordIndexFileCluster(File databaseRoot, serverLog log) {
this.databaseRoot = databaseRoot;
public plasmaWordIndexFileCluster(File databaseRoot, kelondroRow payloadrow, serverLog log) {
this.databaseRoot = databaseRoot;
this.payloadrow = payloadrow;
this.log = log;
this.size = 0;
}
@ -231,7 +234,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
if ((maxTime < 0) || (maxTime > 60000)) maxTime=60000; // maximum is one minute
if (exists(wordHash)) {
plasmaWordIndexFile entity = this.getEntity(wordHash, deleteIfEmpty, (maxTime < 0) ? -1 : maxTime * 9 / 10);
indexContainer container = new indexContainer(wordHash);
indexContainer container = new indexContainer(wordHash, payloadrow);
indexEntry entry;
Iterator i = entity.elements(true);
while ((i.hasNext()) && (System.currentTimeMillis() < (start + maxTime))) {
@ -240,7 +243,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
return container;
} else {
return new indexContainer(wordHash);
return new indexContainer(wordHash, payloadrow);
}
}
@ -255,7 +258,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
public indexContainer deleteContainer(String wordHash) {
plasmaWordIndexFile.removePlasmaIndex(databaseRoot, wordHash);
return new indexContainer(wordHash);
return new indexContainer(wordHash, payloadrow);
}
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
@ -300,7 +303,7 @@ public class plasmaWordIndexFileCluster implements indexRI {
}
public indexContainer addEntry(String wordHash, indexEntry newEntry, long updateTime, boolean dhtCase) {
indexContainer container = new indexContainer(wordHash);
indexContainer container = new indexContainer(wordHash, payloadrow);
container.add(newEntry);
return addEntries(container, updateTime, dhtCase);
}

View File

@ -494,7 +494,7 @@ public final class yacyClient {
final int words = wordhashes.length() / indexEntryAttribute.wordHashLength;
indexContainer[] container = new indexContainer[words];
for (int i = 0; i < words; i++) {
container[i] = new indexContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength));
container[i] = new indexContainer(wordhashes.substring(i * indexEntryAttribute.wordHashLength, (i + 1) * indexEntryAttribute.wordHashLength), indexURLEntry.urlEntryRow);
}
// insert results to containers

View File

@ -74,6 +74,7 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMap;
@ -1235,7 +1236,7 @@ public final class yacy {
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, true, 8*1024*1024, 3000, log, sps.getConfigBool("useCollectionIndex", false));
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
} else if (resource.equals("assortments")) {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, 3000, log);
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, indexURLEntry.urlEntryRow, 16*1024*1024, 3000, log);
indexContainerIterator = assortmentCluster.wordContainers(wordChunkStartHash, true, false);
} /*else if (resource.startsWith("assortment")) {
int a = Integer.parseInt(resource.substring(10));