preparations for collection index cache flush optimization

(hand-over commit, no functional change to current code)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3399 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2007-02-25 21:06:26 +00:00
parent 42e9747650
commit 8668ac5d91
9 changed files with 323 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import javax.imageio.ImageIO;
@ -25,6 +26,7 @@ import de.anomic.kelondro.kelondroProfile;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroSplittedTree;
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.server.serverInstantThread;
import de.anomic.server.serverMemory;
import de.anomic.ymage.ymageChart;
@ -524,6 +526,11 @@ final class dbTable implements kelondroIndex {
}
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
Iterator i = rows.iterator();
while (i.hasNext()) put ((Entry) i.next(), entryDate);
}
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
return put(row);
}

View File

@ -30,6 +30,7 @@ package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.server.serverMemory;
@ -339,6 +340,11 @@ public class kelondroCache implements kelondroIndex {
}
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
Iterator i = rows.iterator();
while (i.hasNext()) put ((Entry) i.next(), entryDate);
}
public synchronized Entry put(Entry row) throws IOException {
assert (row != null);
assert (row.columns() == row().columns());

View File

@ -414,6 +414,251 @@ public class kelondroCollectionIndex {
return removed;
}
private void putnew(byte[] key, kelondroRowCollection collection, kelondroRow.Entry indexrow) throws IOException {
// the collection is new
int newPartitionNumber = arrayIndex(collection.size());
indexrow = index.row().newEntry();
kelondroFixedWidthArray array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// write a new entry in this array
int newRowNumber = array.add(arrayEntry);
// store the new row number in the index
indexrow.setCol(idx_col_key, key);
indexrow.setCol(idx_col_chunksize, this.payloadrow.objectsize());
indexrow.setCol(idx_col_chunkcount, collection.size());
indexrow.setCol(idx_col_clusteridx, (byte) newPartitionNumber);
indexrow.setCol(idx_col_flags, (byte) 0);
indexrow.setCol(idx_col_indexpos, (long) newRowNumber);
indexrow.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
indexrow.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
// after calling this method there mus be a index.addUnique(indexrow);
}
private void putreplace(
byte[] key, kelondroRowCollection collection, kelondroRow.Entry indexrow,
int serialNumber, int chunkSize,
int partitionNumber, int rownumber) throws IOException {
// we don't need a new slot, just write into the old one
// find array file
kelondroFixedWidthArray array = getArray(partitionNumber, serialNumber, chunkSize);
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// overwrite entry in this array
array.set(rownumber, arrayEntry);
// update the index entry
indexrow.setCol(idx_col_chunkcount, collection.size());
indexrow.setCol(idx_col_clusteridx, (byte) partitionNumber);
indexrow.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
// after calling this method there mus be a index.put(indexrow);
}
private void puttransit(
byte[] key, kelondroRowCollection collection, kelondroRow.Entry indexrow,
int serialNumber, int chunkSize,
int oldPartitionNumber, int oldRownumber,
int newPartitionNumber) throws IOException {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray(oldPartitionNumber, serialNumber, chunkSize);
// delete old entry
array.remove(oldRownumber);
// write a new entry in the other array
array = getArray(newPartitionNumber, 0, this.payloadrow.objectsize());
// define row
kelondroRow.Entry arrayEntry = array.row().newEntry();
arrayEntry.setCol(0, key);
arrayEntry.setCol(1, collection.exportCollection());
// write a new entry in this array
int newRowNumber = array.add(arrayEntry);
// store the new row number in the index
indexrow.setCol(idx_col_chunkcount, collection.size());
indexrow.setCol(idx_col_clusteridx, (byte) newPartitionNumber);
indexrow.setCol(idx_col_indexpos, (long) newRowNumber);
indexrow.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
// after calling this method there mus be a index.put(indexrow);
}
private void put1(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException {
// first find an old entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
putnew(key, collection, indexrow); // modifies indexrow
index.addUnique(indexrow);
}
return;
}
// overwrite the old collection
// read old information
int oldchunksize = (int) indexrow.getColLong(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) indexrow.getColLong(idx_col_chunkcount);
int oldrownumber = (int) indexrow.getColLong(idx_col_indexpos);
int oldPartitionNumber = (int) indexrow.getColByte(idx_col_clusteridx);
assert (oldPartitionNumber >= arrayIndex(oldchunkcount));
int oldSerialNumber = 0;
if (collection.size() == 0) {
// delete the index entry and the array
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
index.remove(key);
return;
}
int newPartitionNumber = arrayIndex(collection.size());
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
putreplace(
key, collection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber);
} else {
puttransit(
key, collection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber,
newPartitionNumber);
}
index.put(indexrow); // write modified indexrow
}
private void merge1(byte[] key, kelondroRowCollection collection) throws IOException, kelondroOutOfLimitsException {
//if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
if ((collection == null) || (collection.size() == 0)) return;
// first find an old entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) {
if ((collection != null) && (collection.size() > 0)) {
putnew(key, collection, indexrow); // modifies indexrow
index.addUnique(indexrow);
}
return;
}
// merge with the old collection
// read old information
int oldchunksize = (int) indexrow.getColLong(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) indexrow.getColLong(idx_col_chunkcount);
int oldrownumber = (int) indexrow.getColLong(idx_col_indexpos);
int oldPartitionNumber = (int) indexrow.getColByte(idx_col_clusteridx);
assert (oldPartitionNumber >= arrayIndex(oldchunkcount));
int oldSerialNumber = 0;
// load the old collection and join it
kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false);
// join with new collection
oldcollection.addAllUnique(collection);
oldcollection.shape();
oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
oldcollection.trim();
collection = oldcollection;
int newPartitionNumber = arrayIndex(collection.size());
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
putreplace(
key, collection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber);
} else {
puttransit(
key, collection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber,
newPartitionNumber);
}
index.put(indexrow); // write modified indexrow
}
private int remove1(byte[] key, Set removekeys) throws IOException, kelondroOutOfLimitsException {
//if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
// first find an old entry, if one exists
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return 0;
// overwrite the old collection
// read old information
int oldchunksize = (int) indexrow.getColLong(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) indexrow.getColLong(idx_col_chunkcount);
int oldrownumber = (int) indexrow.getColLong(idx_col_indexpos);
int oldPartitionNumber = (int) indexrow.getColByte(idx_col_clusteridx);
assert (oldPartitionNumber >= arrayIndex(oldchunkcount));
int oldSerialNumber = 0;
int removed = 0;
assert (removekeys != null);
// load the old collection and remove keys
kelondroRowSet oldcollection = getwithparams(indexrow, oldchunksize, oldchunkcount, oldPartitionNumber, oldrownumber, oldSerialNumber, false);
// remove the keys from the set
Iterator i = removekeys.iterator();
Object k;
while (i.hasNext()) {
k = i.next();
if ((k instanceof byte[]) && (oldcollection.remove((byte[]) k) != null)) removed++;
if ((k instanceof String) && (oldcollection.remove(((String) k).getBytes()) != null)) removed++;
}
oldcollection.shape();
oldcollection.trim();
if (oldcollection.size() == 0) {
// delete the index entry and the array
kelondroFixedWidthArray array = getArray(oldPartitionNumber, oldSerialNumber, oldchunksize);
array.remove(oldrownumber);
index.remove(key);
return removed;
}
int newPartitionNumber = arrayIndex(oldcollection.size());
// see if we need new space or if we can overwrite the old space
if (oldPartitionNumber == newPartitionNumber) {
putreplace(
key, oldcollection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber);
} else {
puttransit(
key, oldcollection, indexrow,
oldSerialNumber, this.payloadrow.objectsize(),
oldPartitionNumber, oldrownumber,
newPartitionNumber);
}
index.put(indexrow); // write modified indexrow
return removed;
}
public synchronized int indexSize(byte[] key) throws IOException {
kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return 0;

View File

@ -32,6 +32,7 @@ import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class kelondroFlexSplitTable implements kelondroIndex {
@ -185,6 +186,10 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return (kelondroRow.Entry) keeper[1];
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
throw new UnsupportedOperationException("not yet implemented");
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException {
return put(row, new Date()); // entry for current date
}

View File

@ -29,6 +29,9 @@ import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.server.logging.serverLog;
@ -179,6 +182,33 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
// smaller than an index to a row entry
return super.get(i);
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
// put a list of entries in a ordered way.
// this should save R/W head positioning time
Iterator i = rows.iterator();
kelondroRow.Entry row;
TreeMap ordered = new TreeMap();
int pos;
byte[] key;
while (i.hasNext()) {
row = (kelondroRow.Entry) i.next();
key = row.getColBytes(0);
pos = index.geti(key);
if (pos < 0) {
index.puti(key, super.add(row));
} else {
ordered.put(new Integer(pos), row);
}
}
i = ordered.entrySet().iterator();
Map.Entry entry;
while (i.hasNext()) {
entry = (Map.Entry) i.next();
pos = ((Integer) entry.getKey()).intValue();
super.set(pos, (kelondroRow.Entry) entry.getValue());
}
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
return put(row);

View File

@ -53,6 +53,7 @@ package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
public interface kelondroIndex {
@ -63,6 +64,7 @@ public interface kelondroIndex {
public kelondroRow.Entry get(byte[] key) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
public void putMultiple(List /* of kelondroRow.Entry*/ rows, Date entryDate) throws IOException; // for R/W head path optimization
public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check
public void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException; // no double-check
public kelondroRow.Entry remove(byte[] key) throws IOException;

View File

@ -27,9 +27,11 @@ package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.server.logging.serverLog;
public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex {
@ -100,6 +102,11 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
}
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
Iterator i = rows.iterator();
while (i.hasNext()) put ((Entry) i.next(), entryDate);
}
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) {
return put(row);
}

View File

@ -47,8 +47,10 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
public class kelondroSplittedTree implements kelondroIndex {
@ -113,6 +115,18 @@ public class kelondroSplittedTree implements kelondroIndex {
return ktfs[partition(key)].get(key);
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
Iterator i = rows.iterator();
kelondroRow.Entry row;
ArrayList[] parts = new ArrayList[ktfs.length];
for (int j = 0; j < ktfs.length; j++) parts[j] = new ArrayList();
while (i.hasNext()) {
row = (kelondroRow.Entry) i.next();
parts[partition(row.getColBytes(0))].add(row);
}
for (int j = 0; j < ktfs.length; j++) ktfs[j].putMultiple(parts[j], entryDate);
}
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
return put(row);
}

View File

@ -54,6 +54,7 @@ import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeMap;
@ -61,6 +62,7 @@ import java.util.TreeSet;
import java.util.Vector;
import java.util.logging.Logger;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.server.logging.serverLog;
public class kelondroTree extends kelondroRecords implements kelondroIndex {
@ -326,6 +328,11 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
return (lc.equals(childn.handle()));
}
public synchronized void putMultiple(List rows, Date entryDate) throws IOException {
Iterator i = rows.iterator();
while (i.hasNext()) put ((Entry) i.next(), entryDate);
}
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException {
return put(row);
}