2006-03-01 01:25:02 +01:00
package de.anomic.kelondro ;
2006-08-05 01:04:03 +02:00
// a collectionIndex is an index to kelondroRowCollection objects
2006-03-01 01:25:02 +01:00
// such a collection ist defined by the following parameters
// - chunksize
// - chunkcount
// each of such a collection is stored in a byte[] which may or may not have space for more chunks
// than already exists in such an array. To store these arrays, we reserve entries in kelondroArray
// database files. There will be a set of array files for different sizes of the collection arrays.
// the 1st file has space for <loadfactor> chunks, the 2nd file for <loadfactor> * <loadfactor> chunks,
// the 3rd file for <loadfactor>^^3 chunks, and the n-th file for <loadfactor>^^n chunks.
// if the loadfactor is 4, then we have the following capacities:
// file 0: 4
// file 1: 16
// file 2: 64
// file 3: 256
// file 4: 1024
// file 5: 4096
// file 6:16384
// file 7:65536
// the maximum number of such files is called the partitions number.
// we don't want that these files grow too big, an kelondroOutOfLimitsException is throws if they
// are oversized.
// the collection arrays may be migration to another size during run-time, which means that not only the
// partitions as mentioned above are maintained, but also a set of "shadow-partitions", that represent old
// partitions and where data is read only and slowly migrated to the default partitions.
import java.io.File ;
import java.io.IOException ;
2007-03-03 01:55:51 +01:00
import java.text.SimpleDateFormat ;
2007-02-26 16:49:23 +01:00
import java.util.ArrayList ;
2007-03-03 01:55:51 +01:00
import java.util.Calendar ;
2007-02-26 16:49:23 +01:00
import java.util.Date ;
2007-03-03 01:55:51 +01:00
import java.util.GregorianCalendar ;
2006-08-05 01:04:03 +02:00
import java.util.HashMap ;
import java.util.Iterator ;
2007-02-26 16:49:23 +01:00
import java.util.List ;
2006-08-05 01:04:03 +02:00
import java.util.Map ;
2007-03-03 01:55:51 +01:00
import java.util.Random ;
2006-08-05 21:18:33 +02:00
import java.util.Set ;
2007-03-03 01:55:51 +01:00
import java.util.TimeZone ;
2007-02-28 12:13:23 +01:00
import java.util.TreeMap ;
2006-03-01 01:25:02 +01:00
2007-02-26 16:49:23 +01:00
import de.anomic.index.indexContainer ;
2007-03-03 01:55:51 +01:00
import de.anomic.plasma.plasmaURL ;
import de.anomic.server.serverCodings ;
2006-08-11 05:20:44 +02:00
import de.anomic.server.serverFileUtils ;
2007-03-04 23:54:04 +01:00
import de.anomic.server.serverMemory ;
2006-10-06 01:47:08 +02:00
import de.anomic.server.logging.serverLog ;
2006-08-11 05:20:44 +02:00
2006-03-01 01:25:02 +01:00
public class kelondroCollectionIndex {
2007-02-27 14:01:22 +01:00
private static final int serialNumber = 0 ;
2007-03-03 01:55:51 +01:00
private kelondroIndex index ;
private int keylength ;
2006-08-05 01:04:03 +02:00
private File path ;
private String filenameStub ;
2007-03-03 01:55:51 +01:00
private File commonsPath ;
2006-08-05 01:04:03 +02:00
private int loadfactor ;
private Map arrays ; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects
2006-11-05 03:10:40 +01:00
private kelondroRow payloadrow ; // definition of the payload (chunks inside the collections)
2007-03-03 01:55:51 +01:00
private int maxPartitions ; // this is the maxmimum number of array files; yet not used
2006-08-05 01:04:03 +02:00
private static final int idx_col_key = 0 ; // the index
private static final int idx_col_chunksize = 1 ; // chunksize (number of bytes in a single chunk, needed for migration option)
2006-10-30 03:39:39 +01:00
private static final int idx_col_chunkcount = 2 ; // chunkcount (number of chunks in this collection)
2006-10-06 01:47:08 +02:00
private static final int idx_col_clusteridx = 3 ; // selector for right cluster file, must be >= arrayIndex(chunkcount)
private static final int idx_col_flags = 4 ; // flags (for future use)
2006-10-30 03:39:39 +01:00
private static final int idx_col_indexpos = 5 ; // indexpos (position in array file)
2006-10-06 01:47:08 +02:00
private static final int idx_col_lastread = 6 ; // a time stamp, update time in days since 1.1.2000
private static final int idx_col_lastwrote = 7 ; // a time stamp, update time in days since 1.1.2000
2006-08-05 01:04:03 +02:00
2006-12-06 04:02:57 +01:00
private static kelondroRow indexRow ( int keylength , kelondroOrder payloadOrder ) {
2006-08-05 01:04:03 +02:00
return new kelondroRow (
2006-10-06 01:47:08 +02:00
" byte[] key- " + keylength + " , " +
2006-08-05 01:04:03 +02:00
" int chunksize-4 {b256}, " +
" int chunkcount-4 {b256}, " +
2006-10-06 01:47:08 +02:00
" byte clusteridx-1 {b256}, " +
" byte flags-1 {b256}, " +
2006-08-05 01:04:03 +02:00
" int indexpos-4 {b256}, " +
2006-08-06 00:22:14 +02:00
" short lastread-2 {b256}, " +
2006-12-06 04:02:57 +01:00
" short lastwrote-2 {b256} " ,
payloadOrder , 0
2006-08-05 01:04:03 +02:00
) ;
}
2006-03-01 01:25:02 +01:00
2006-11-05 03:10:40 +01:00
public kelondroRow payloadRow ( ) {
return this . payloadrow ;
}
2006-08-11 05:20:44 +02:00
private static String fillZ ( String s , int len ) {
while ( s . length ( ) < len ) s = " 0 " + s ;
return s ;
}
2006-08-05 21:18:33 +02:00
private static File arrayFile ( File path , String filenameStub , int loadfactor , int chunksize , int partitionNumber , int serialNumber ) {
2006-08-11 05:20:44 +02:00
String lf = fillZ ( Integer . toHexString ( loadfactor ) . toUpperCase ( ) , 2 ) ;
String cs = fillZ ( Integer . toHexString ( chunksize ) . toUpperCase ( ) , 4 ) ;
String pn = fillZ ( Integer . toHexString ( partitionNumber ) . toUpperCase ( ) , 2 ) ;
String sn = fillZ ( Integer . toHexString ( serialNumber ) . toUpperCase ( ) , 2 ) ;
2006-08-05 21:18:33 +02:00
return new File ( path , filenameStub + " . " + lf + " . " + cs + " . " + pn + " . " + sn + " .kca " ) ; // kelondro collection array
2006-03-01 01:25:02 +01:00
}
2006-10-06 01:47:08 +02:00
2006-08-11 05:20:44 +02:00
private static File propertyFile ( File path , String filenameStub , int loadfactor , int chunksize ) {
String lf = fillZ ( Integer . toHexString ( loadfactor ) . toUpperCase ( ) , 2 ) ;
String cs = fillZ ( Integer . toHexString ( chunksize ) . toUpperCase ( ) , 4 ) ;
2006-10-06 01:47:08 +02:00
return new File ( path , filenameStub + " . " + lf + " . " + cs + " .properties " ) ;
2006-08-11 05:20:44 +02:00
}
2006-07-04 01:57:33 +02:00
public kelondroCollectionIndex ( File path , String filenameStub , int keyLength , kelondroOrder indexOrder ,
2007-03-06 23:43:32 +01:00
long preloadTime , int loadfactor , int maxpartitions , kelondroRow rowdef ) throws IOException {
2006-08-05 01:04:03 +02:00
// the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree
2006-03-01 01:25:02 +01:00
this . path = path ;
this . filenameStub = filenameStub ;
2006-10-06 01:47:08 +02:00
this . keylength = keyLength ;
2006-11-05 03:10:40 +01:00
this . payloadrow = rowdef ;
2006-03-01 01:25:02 +01:00
this . loadfactor = loadfactor ;
2007-03-03 01:55:51 +01:00
this . maxPartitions = maxpartitions ;
this . commonsPath = new File ( path , filenameStub + " . " + fillZ ( Integer . toHexString ( rowdef . objectsize ) . toUpperCase ( ) , 4 ) + " .commons " ) ;
this . commonsPath . mkdirs ( ) ;
2006-10-06 01:47:08 +02:00
boolean ramIndexGeneration = false ;
boolean fileIndexGeneration = ! ( new File ( path , filenameStub + " .index " ) . exists ( ) ) ;
2006-12-06 04:02:57 +01:00
if ( ramIndexGeneration ) index = new kelondroRowSet ( indexRow ( keyLength , indexOrder ) , 0 ) ;
2007-04-05 12:14:48 +02:00
if ( fileIndexGeneration ) index = new kelondroFlexTable ( path , filenameStub + " .index " , preloadTime , indexRow ( keyLength , indexOrder ) , true ) ;
2006-10-06 01:47:08 +02:00
// open array files
this . arrays = new HashMap ( ) ; // all entries will be dynamically created with getArray()
if ( ( ( fileIndexGeneration ) | | ( ramIndexGeneration ) ) ) {
2006-10-16 02:27:25 +02:00
serverLog . logFine ( " STARTUP " , " STARTED INITIALIZATION OF NEW COLLECTION INDEX. THIS WILL TAKE SOME TIME " ) ;
2006-10-06 01:47:08 +02:00
openAllArrayFiles ( ( ( fileIndexGeneration ) | | ( ramIndexGeneration ) ) , indexOrder ) ;
}
// open/create index table
2007-03-06 23:43:32 +01:00
if ( index = = null ) index = openIndexFile ( path , filenameStub , indexOrder , preloadTime , loadfactor , rowdef ) ;
2006-10-06 01:47:08 +02:00
}
private void openAllArrayFiles ( boolean indexGeneration , kelondroOrder indexOrder ) throws IOException {
String [ ] list = this . path . list ( ) ;
kelondroFixedWidthArray array ;
2006-12-06 04:02:57 +01:00
kelondroRow irow = indexRow ( keylength , indexOrder ) ;
2006-10-06 01:47:08 +02:00
int t = kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ;
for ( int i = 0 ; i < list . length ; i + + ) if ( list [ i ] . endsWith ( " .kca " ) ) {
// open array
int pos = list [ i ] . indexOf ( '.' ) ;
if ( pos < 0 ) continue ;
int chunksize = Integer . parseInt ( list [ i ] . substring ( pos + 4 , pos + 8 ) , 16 ) ;
int partitionNumber = Integer . parseInt ( list [ i ] . substring ( pos + 9 , pos + 11 ) , 16 ) ;
int serialNumber = Integer . parseInt ( list [ i ] . substring ( pos + 12 , pos + 14 ) , 16 ) ;
try {
array = openArrayFile ( partitionNumber , serialNumber , true ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
continue ;
}
// remember that we opened the array
arrays . put ( partitionNumber + " - " + chunksize , array ) ;
if ( ( index ! = null ) & & ( indexGeneration ) ) {
// loop over all elements in array and create index entry for each row
2006-10-30 03:39:39 +01:00
kelondroRow . EntryIndex aentry ;
kelondroRow . Entry ientry ;
Iterator ei = array . contentRows ( - 1 ) ;
2006-10-06 01:47:08 +02:00
byte [ ] key ;
long start = System . currentTimeMillis ( ) ;
long lastlog = start ;
2006-10-30 03:39:39 +01:00
int count = 0 ;
while ( ei . hasNext ( ) ) {
aentry = ( kelondroRow . EntryIndex ) ei . next ( ) ;
2006-10-06 01:47:08 +02:00
key = aentry . getColBytes ( 0 ) ;
2006-10-30 03:39:39 +01:00
assert ( key ! = null ) ;
2006-10-06 01:47:08 +02:00
if ( key = = null ) continue ; // skip deleted entries
ientry = irow . newEntry ( ) ;
ientry . setCol ( idx_col_key , key ) ;
ientry . setCol ( idx_col_chunksize , chunksize ) ;
2007-04-03 14:10:12 +02:00
ientry . setCol ( idx_col_chunkcount , kelondroRowCollection . sizeOfExportedCollectionRows ( aentry , 1 ) ) ;
2006-10-06 01:47:08 +02:00
ientry . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
ientry . setCol ( idx_col_flags , ( byte ) 0 ) ;
2006-10-30 03:39:39 +01:00
ientry . setCol ( idx_col_indexpos , aentry . index ( ) ) ;
2006-10-06 01:47:08 +02:00
ientry . setCol ( idx_col_lastread , t ) ;
ientry . setCol ( idx_col_lastwrote , t ) ;
2006-10-26 15:50:50 +02:00
index . addUnique ( ientry ) ;
2006-10-30 03:39:39 +01:00
count + + ;
2006-10-06 01:47:08 +02:00
// write a log
if ( System . currentTimeMillis ( ) - lastlog > 30000 ) {
2007-02-20 09:35:51 +01:00
serverLog . logFine ( " STARTUP " , " created " + count + " RWI index entries. " + ( ( ( System . currentTimeMillis ( ) - start ) * ( array . size ( ) + array . free ( ) - count ) / count ) / 60000 ) + " minutes remaining for this array " ) ;
2006-10-06 01:47:08 +02:00
lastlog = System . currentTimeMillis ( ) ;
}
}
}
}
}
private kelondroIndex openIndexFile ( File path , String filenameStub , kelondroOrder indexOrder ,
2007-03-06 23:43:32 +01:00
long preloadTime , int loadfactor , kelondroRow rowdef ) throws IOException {
2006-10-06 01:47:08 +02:00
// open/create index table
2007-04-05 12:14:48 +02:00
kelondroIndex theindex = new kelondroCache ( new kelondroFlexTable ( path , filenameStub + " .index " , preloadTime , indexRow ( keylength , indexOrder ) , true ) , true , false ) ;
2006-08-05 01:04:03 +02:00
2006-08-11 05:20:44 +02:00
// save/check property file for this array
File propfile = propertyFile ( path , filenameStub , loadfactor , rowdef . objectsize ( ) ) ;
Map props = new HashMap ( ) ;
if ( propfile . exists ( ) ) {
props = serverFileUtils . loadHashMap ( propfile ) ;
String stored_rowdef = ( String ) props . get ( " rowdef " ) ;
2007-04-03 14:10:12 +02:00
if ( ( stored_rowdef = = null ) | | ( ! ( rowdef . subsumes ( new kelondroRow ( stored_rowdef , rowdef . objectOrder , 0 ) ) ) ) ) {
2006-08-11 05:20:44 +02:00
System . out . println ( " FATAL ERROR: stored rowdef ' " + stored_rowdef + " ' does not match with new rowdef ' " +
rowdef + " ' for array cluster ' " + path + " / " + filenameStub + " ' " ) ;
System . exit ( - 1 ) ;
}
}
props . put ( " rowdef " , rowdef . toString ( ) ) ;
serverFileUtils . saveMap ( propfile , props , " CollectionIndex properties " ) ;
2006-10-06 01:47:08 +02:00
return theindex ;
2006-03-01 01:25:02 +01:00
}
2006-08-05 21:18:33 +02:00
private kelondroFixedWidthArray openArrayFile ( int partitionNumber , int serialNumber , boolean create ) throws IOException {
2006-11-05 03:10:40 +01:00
File f = arrayFile ( path , filenameStub , loadfactor , payloadrow . objectsize ( ) , partitionNumber , serialNumber ) ;
2006-08-12 17:59:14 +02:00
int load = arrayCapacity ( partitionNumber ) ;
kelondroRow rowdef = new kelondroRow (
2006-10-06 01:47:08 +02:00
" byte[] key- " + keylength + " , " +
2006-12-06 04:02:57 +01:00
" byte[] collection- " + ( kelondroRowCollection . exportOverheadSize + load * this . payloadrow . objectsize ( ) ) ,
index . row ( ) . objectOrder ,
0
2006-08-12 17:59:14 +02:00
) ;
2006-08-24 04:19:25 +02:00
if ( ( ! ( f . exists ( ) ) ) & & ( ! create ) ) return null ;
2006-10-06 01:47:08 +02:00
kelondroFixedWidthArray a = new kelondroFixedWidthArray ( f , rowdef , 0 ) ;
serverLog . logFine ( " STARTUP " , " opened array file " + f + " with " + a . size ( ) + " RWIs " ) ;
return a ;
2006-03-01 01:25:02 +01:00
}
2006-08-05 21:18:33 +02:00
private kelondroFixedWidthArray getArray ( int partitionNumber , int serialNumber , int chunksize ) {
2006-08-05 01:04:03 +02:00
String accessKey = partitionNumber + " - " + chunksize ;
kelondroFixedWidthArray array = ( kelondroFixedWidthArray ) arrays . get ( accessKey ) ;
if ( array ! = null ) return array ;
try {
2006-08-05 21:18:33 +02:00
array = openArrayFile ( partitionNumber , serialNumber , true ) ;
2006-08-05 01:04:03 +02:00
} catch ( IOException e ) {
return null ;
}
arrays . put ( accessKey , array ) ;
return array ;
}
2007-02-27 14:01:22 +01:00
private void arrayResolveRemoved ( ) throws IOException {
Iterator i = arrays . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) {
( ( kelondroFixedWidthArray ) i . next ( ) ) . resolveMarkedRemoved ( ) ;
}
}
2006-08-05 01:04:03 +02:00
private int arrayCapacity ( int arrayCounter ) {
2007-03-04 23:54:04 +01:00
if ( arrayCounter < 0 ) return 0 ;
2006-08-05 01:04:03 +02:00
int load = this . loadfactor ;
for ( int i = 0 ; i < arrayCounter ; i + + ) load = load * this . loadfactor ;
return load ;
}
2006-03-01 01:25:02 +01:00
private int arrayIndex ( int requestedCapacity ) throws kelondroOutOfLimitsException {
// the requestedCapacity is the number of wanted chunks
2006-08-05 01:04:03 +02:00
int load = 1 , i = 0 ;
while ( true ) {
load = load * this . loadfactor ;
if ( load > = requestedCapacity ) return i ;
i + + ;
2006-03-01 01:25:02 +01:00
}
}
2007-03-18 20:45:23 +01:00
public int size ( ) throws IOException {
2006-07-03 17:14:54 +02:00
return index . size ( ) ;
}
2006-12-22 13:54:56 +01:00
public int minMem ( ) {
// calculate a minimum amount of memory that is necessary to use the collection
// during runtime (after the index was initialized)
// caclculate an upper limit (not the correct size) of the maximum number of indexes for a wordHash
// this is computed by the size of the biggest used collection
// this must be multiplied with the payload size
// and doubled for necessary memory transformation during sort operation
2007-03-07 10:38:14 +01:00
return ( int ) ( 2 * arrayCapacity ( arrays . size ( ) - 1 ) * this . payloadrow . objectsize * kelondroRowSet . growfactor ) ;
2006-12-22 13:54:56 +01:00
}
2006-08-05 01:04:03 +02:00
2007-02-27 16:54:02 +01:00
private void array_remove (
int oldPartitionNumber , int serialNumber , int chunkSize ,
int oldRownumber ) throws IOException {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray ( oldPartitionNumber , serialNumber , chunkSize ) ;
// delete old entry
array . remove ( oldRownumber , true ) ;
}
private kelondroRow . Entry array_new (
byte [ ] key , kelondroRowCollection collection ) throws IOException {
2007-02-25 22:06:26 +01:00
// the collection is new
2007-02-27 16:54:02 +01:00
int partitionNumber = arrayIndex ( collection . size ( ) ) ;
2007-02-26 16:49:23 +01:00
kelondroRow . Entry indexrow = index . row ( ) . newEntry ( ) ;
2007-02-27 16:54:02 +01:00
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ;
2007-02-25 22:06:26 +01:00
// define row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// write a new entry in this array
int newRowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
indexrow . setCol ( idx_col_key , key ) ;
indexrow . setCol ( idx_col_chunksize , this . payloadrow . objectsize ( ) ) ;
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
2007-02-27 16:54:02 +01:00
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
2007-02-25 22:06:26 +01:00
indexrow . setCol ( idx_col_flags , ( byte ) 0 ) ;
indexrow . setCol ( idx_col_indexpos , ( long ) newRowNumber ) ;
indexrow . setCol ( idx_col_lastread , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
2007-02-27 14:01:22 +01:00
// after calling this method there must be an index.addUnique(indexrow);
2007-02-26 16:49:23 +01:00
return indexrow ;
2007-02-25 22:06:26 +01:00
}
2007-02-27 14:01:22 +01:00
private void array_add (
2007-02-25 22:06:26 +01:00
byte [ ] key , kelondroRowCollection collection , kelondroRow . Entry indexrow ,
2007-02-27 14:01:22 +01:00
int partitionNumber , int serialNumber , int chunkSize ) throws IOException {
2007-02-25 22:06:26 +01:00
2007-02-27 14:01:22 +01:00
// write a new entry in the other array
2007-02-25 22:06:26 +01:00
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
2007-02-27 14:01:22 +01:00
2007-02-26 16:49:23 +01:00
// define new row
2007-02-25 22:06:26 +01:00
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
2007-02-27 14:01:22 +01:00
// write a new entry in this array
int rowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
2007-02-25 22:06:26 +01:00
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
2007-02-27 14:01:22 +01:00
indexrow . setCol ( idx_col_indexpos , ( long ) rowNumber ) ;
2007-02-25 22:06:26 +01:00
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
2007-02-27 14:01:22 +01:00
// after calling this method there must be a index.put(indexrow);
2007-02-25 22:06:26 +01:00
}
2007-02-28 12:13:23 +01:00
private ArrayList array_add_multiple ( TreeMap array_add_map , int serialNumber , int chunkSize ) throws IOException {
// returns a List of kelondroRow.Entry entries for indexrow storage
Map . Entry entry ;
Iterator i = array_add_map . entrySet ( ) . iterator ( ) ;
Iterator j ;
ArrayList actionList ;
int partitionNumber ;
kelondroFixedWidthArray array ;
Object [ ] objs ;
byte [ ] key ;
kelondroRowCollection collection ;
kelondroRow . Entry indexrow ;
ArrayList indexrows = new ArrayList ( ) ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
actionList = ( ArrayList ) entry . getValue ( ) ;
partitionNumber = ( ( Integer ) entry . getKey ( ) ) . intValue ( ) ;
array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
j = actionList . iterator ( ) ;
while ( j . hasNext ( ) ) {
objs = ( Object [ ] ) j . next ( ) ;
key = ( byte [ ] ) objs [ 0 ] ;
collection = ( kelondroRowCollection ) objs [ 1 ] ;
indexrow = ( kelondroRow . Entry ) objs [ 2 ] ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// write a new entry in this array
int rowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_indexpos , ( long ) rowNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrows . add ( indexrow ) ;
}
}
// after calling this method there must be a index.put(indexrow);
return indexrows ;
}
2007-02-27 14:01:22 +01:00
private void array_replace (
2007-02-25 22:06:26 +01:00
byte [ ] key , kelondroRowCollection collection , kelondroRow . Entry indexrow ,
2007-02-27 14:01:22 +01:00
int partitionNumber , int serialNumber , int chunkSize ,
int rowNumber ) throws IOException {
// we don't need a new slot, just write collection into the old one
2007-02-25 22:06:26 +01:00
2007-02-27 14:01:22 +01:00
// find array file
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
2007-02-25 22:06:26 +01:00
2007-02-27 14:01:22 +01:00
// define new row
2007-02-25 22:06:26 +01:00
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
2007-02-27 14:01:22 +01:00
// overwrite entry in this array
array . set ( rowNumber , arrayEntry ) ;
// update the index entry
2007-04-03 14:10:12 +02:00
final int collectionsize = collection . size ( ) ; // extra variable for easier debugging
indexrow . setCol ( idx_col_chunkcount , collectionsize ) ;
2007-02-27 14:01:22 +01:00
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
2007-02-25 22:06:26 +01:00
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
2007-02-27 14:01:22 +01:00
2007-04-03 14:10:12 +02:00
// after calling this method there must be a index.put(indexrow);
2007-02-25 22:06:26 +01:00
}
2007-02-28 12:13:23 +01:00
private ArrayList array_replace_multiple ( TreeMap array_replace_map , int serialNumber , int chunkSize ) throws IOException {
Map . Entry entry , e ;
Iterator i = array_replace_map . entrySet ( ) . iterator ( ) ;
Iterator j ;
TreeMap actionMap ;
int partitionNumber ;
kelondroFixedWidthArray array ;
ArrayList indexrows = new ArrayList ( ) ;
Object [ ] objs ;
int rowNumber ;
byte [ ] key ;
kelondroRowCollection collection ;
kelondroRow . Entry indexrow ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
actionMap = ( TreeMap ) entry . getValue ( ) ;
partitionNumber = ( ( Integer ) entry . getKey ( ) ) . intValue ( ) ;
array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
j = actionMap . entrySet ( ) . iterator ( ) ;
while ( j . hasNext ( ) ) {
e = ( Map . Entry ) j . next ( ) ;
rowNumber = ( ( Integer ) e . getKey ( ) ) . intValue ( ) ;
objs = ( Object [ ] ) e . getValue ( ) ;
key = ( byte [ ] ) objs [ 0 ] ;
collection = ( kelondroRowCollection ) objs [ 1 ] ;
indexrow = ( kelondroRow . Entry ) objs [ 2 ] ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// overwrite entry in this array
array . set ( rowNumber , arrayEntry ) ;
// update the index entry
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrows . add ( indexrow ) ;
}
}
// after calling this method there mus be a index.put(indexrow);
return indexrows ;
}
2007-02-26 16:49:23 +01:00
public synchronized void put ( byte [ ] key , kelondroRowCollection collection ) throws IOException , kelondroOutOfLimitsException {
2007-02-25 22:06:26 +01:00
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
2007-02-26 16:49:23 +01:00
// create new row and index entry
2007-02-25 22:06:26 +01:00
if ( ( collection ! = null ) & & ( collection . size ( ) > 0 ) ) {
2007-02-27 14:01:22 +01:00
indexrow = array_new ( key , collection ) ; // modifies indexrow
2007-02-25 22:06:26 +01:00
index . addUnique ( indexrow ) ;
}
return ;
}
// overwrite the old collection
// read old information
2007-02-26 16:49:23 +01:00
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
2007-02-25 22:06:26 +01:00
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
2007-02-26 16:49:23 +01:00
if ( ( collection = = null ) | | ( collection . size ( ) = = 0 ) ) {
2007-02-25 22:06:26 +01:00
// delete the index entry and the array
2007-02-27 14:01:22 +01:00
kelondroFixedWidthArray array = getArray ( oldPartitionNumber , serialNumber , oldchunksize ) ;
array . remove ( oldrownumber , false ) ;
2007-02-25 22:06:26 +01:00
index . remove ( key ) ;
return ;
}
int newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
2007-02-27 14:01:22 +01:00
array_replace (
2007-02-25 22:06:26 +01:00
key , collection , indexrow ,
2007-02-27 14:01:22 +01:00
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
2007-02-25 22:06:26 +01:00
} else {
2007-02-27 14:01:22 +01:00
array_remove (
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
2007-02-25 22:06:26 +01:00
key , collection , indexrow ,
2007-02-27 14:01:22 +01:00
newPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
2007-02-25 22:06:26 +01:00
}
2007-02-27 14:01:22 +01:00
arrayResolveRemoved ( ) ; // remove all to-be-removed marked entries
2007-04-03 14:10:12 +02:00
if ( ( int ) indexrow . getColLong ( idx_col_chunkcount ) ! = collection . size ( ) )
serverLog . logSevere ( " kelondroCollectionIndex " , " UPDATE (put) ERROR: array has different chunkcount than index after merge: index = " + ( int ) indexrow . getColLong ( idx_col_chunkcount ) + " , collection.size() = " + collection . size ( ) ) ;
2007-02-25 22:06:26 +01:00
index . put ( indexrow ) ; // write modified indexrow
}
2007-02-26 16:49:23 +01:00
public synchronized void mergeMultiple ( List /* of indexContainer */ containerList ) throws IOException , kelondroOutOfLimitsException {
// merge a bulk of index containers
// this method should be used to optimize the R/W head path length
2007-02-25 22:06:26 +01:00
2007-02-26 16:49:23 +01:00
// separate the list in two halves:
// - containers that do not exist yet in the collection
// - containers that do exist in the collection and must be merged
Iterator i = containerList . iterator ( ) ;
indexContainer container ;
byte [ ] key ;
ArrayList newContainer = new ArrayList ( ) ;
2007-02-28 12:13:23 +01:00
TreeMap existingContainer = new TreeMap ( ) ; // a mapping from Integer (partition) to a TreeMap (mapping from index to object triple)
TreeMap containerMap ; // temporary map; mapping from index position to object triple with {key, container, indexrow}
2007-02-26 16:49:23 +01:00
kelondroRow . Entry indexrow ;
2007-02-28 12:13:23 +01:00
int oldrownumber1 ; // index of the entry in array
int oldPartitionNumber1 ; // points to array file
2007-02-26 16:49:23 +01:00
while ( i . hasNext ( ) ) {
container = ( indexContainer ) i . next ( ) ;
if ( ( container = = null ) | | ( container . size ( ) = = 0 ) ) continue ;
key = container . getWordHash ( ) . getBytes ( ) ;
// first find an old entry, if one exists
indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
newContainer . add ( new Object [ ] { key , container } ) ;
} else {
2007-02-28 12:13:23 +01:00
oldrownumber1 = ( int ) indexrow . getColLong ( idx_col_indexpos ) ;
oldPartitionNumber1 = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ;
containerMap = ( TreeMap ) existingContainer . get ( new Integer ( oldPartitionNumber1 ) ) ;
if ( containerMap = = null ) containerMap = new TreeMap ( ) ;
containerMap . put ( new Integer ( oldrownumber1 ) , new Object [ ] { key , container , indexrow } ) ;
existingContainer . put ( new Integer ( oldPartitionNumber1 ) , containerMap ) ;
2007-02-26 16:49:23 +01:00
}
}
// now iterate through the container lists and execute merges
// this is done in such a way, that there is a optimized path for the R/W head
// merge existing containers
2007-02-28 12:13:23 +01:00
Map . Entry tripleEntry ;
2007-02-27 14:01:22 +01:00
Object [ ] record ;
2007-02-27 16:54:02 +01:00
ArrayList indexrows_existing = new ArrayList ( ) ;
2007-02-27 14:01:22 +01:00
kelondroRowCollection collection ;
2007-02-28 12:13:23 +01:00
TreeMap array_replace_map = new TreeMap ( ) ;
TreeMap array_add_map = new TreeMap ( ) ;
ArrayList actionList ;
TreeMap actionMap ;
2007-03-04 23:54:04 +01:00
boolean madegc = false ;
2007-04-03 14:10:12 +02:00
System . out . println ( " DEBUG existingContainer: " + existingContainer . toString ( ) ) ;
2007-02-28 12:13:23 +01:00
while ( existingContainer . size ( ) > 0 ) {
oldPartitionNumber1 = ( ( Integer ) existingContainer . lastKey ( ) ) . intValue ( ) ;
containerMap = ( TreeMap ) existingContainer . remove ( new Integer ( oldPartitionNumber1 ) ) ;
Iterator j = containerMap . entrySet ( ) . iterator ( ) ;
while ( j . hasNext ( ) ) {
tripleEntry = ( Map . Entry ) j . next ( ) ;
oldrownumber1 = ( ( Integer ) tripleEntry . getKey ( ) ) . intValue ( ) ;
record = ( Object [ ] ) tripleEntry . getValue ( ) ; // {byte[], indexContainer, kelondroRow.Entry}
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// merge with the old collection
key = ( byte [ ] ) record [ 0 ] ;
collection = ( kelondroRowCollection ) record [ 1 ] ;
indexrow = ( kelondroRow . Entry ) record [ 2 ] ;
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// read old information
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
2007-04-03 14:10:12 +02:00
assert oldPartitionNumber1 = = oldPartitionNumber : " oldPartitionNumber1 = " + oldPartitionNumber1 + " , oldPartitionNumber = " + oldPartitionNumber + " , containerMap = " + containerMap + " , existingContainer: " + existingContainer . toString ( ) ;
assert oldrownumber1 = = oldrownumber : " oldrownumber1 = " + oldrownumber1 + " , oldrownumber = " + oldrownumber + " , containerMap = " + containerMap + " , existingContainer: " + existingContainer . toString ( ) ;
2007-02-28 12:13:23 +01:00
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
int oldSerialNumber = 0 ;
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// load the old collection and join it
kelondroRowSet oldcollection = getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , oldSerialNumber , false ) ;
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// join with new collection
oldcollection . addAllUnique ( collection ) ;
2007-03-14 09:55:05 +01:00
oldcollection . sort ( ) ;
2007-02-28 12:13:23 +01:00
oldcollection . uniq ( ) ; // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
2007-03-08 17:15:40 +01:00
oldcollection . trim ( false ) ;
2007-03-03 01:55:51 +01:00
// check for size of collection:
// if necessary shrink the collection and dump a part of that collection
// to avoid that this grows too big
int newPartitionNumber ;
while ( ( newPartitionNumber = arrayIndex ( oldcollection . size ( ) ) ) > maxPartitions ) {
kelondroRowSet newcollection = shrinkCollection ( key , oldcollection , arrayCapacity ( maxPartitions ) ) ;
saveCommons ( key , oldcollection ) ;
oldcollection = newcollection ;
}
// work on with oldcollection
2007-02-28 12:13:23 +01:00
collection = oldcollection ;
2007-03-03 01:55:51 +01:00
newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
actionMap = ( TreeMap ) array_replace_map . get ( new Integer ( oldPartitionNumber ) ) ;
if ( actionMap = = null ) actionMap = new TreeMap ( ) ;
actionMap . put ( new Integer ( oldrownumber ) , new Object [ ] { key , collection , indexrow } ) ;
array_replace_map . put ( new Integer ( oldPartitionNumber ) , actionMap ) ;
/ *
array_replace (
key , collection , indexrow ,
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
indexrows_existing . add ( indexrow ) ; // indexrows are collected and written later as block
* /
} else {
array_remove (
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
actionList = ( ArrayList ) array_add_map . get ( new Integer ( newPartitionNumber ) ) ;
if ( actionList = = null ) actionList = new ArrayList ( ) ;
actionList . add ( new Object [ ] { key , collection , indexrow } ) ;
array_add_map . put ( new Integer ( newPartitionNumber ) , actionList ) ;
/ *
array_add (
key , collection , indexrow ,
newPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
indexrows_existing . add ( indexrow ) ; // indexrows are collected and written later as block
* /
}
// memory protection: flush collected collections
2007-03-04 23:54:04 +01:00
if ( serverMemory . available ( ) < minMem ( ) ) {
// emergency flush
indexrows_existing . addAll ( array_replace_multiple ( array_replace_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_replace_map = new TreeMap ( ) ; // delete references
indexrows_existing . addAll ( array_add_multiple ( array_add_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_add_map = new TreeMap ( ) ; // delete references
if ( ! madegc ) {
// prevent that this flush is made again even when there is enough memory
System . gc ( ) ;
// prevent that this gc happens more than one time
madegc = true ;
}
}
2007-02-27 14:01:22 +01:00
}
2007-02-26 16:49:23 +01:00
}
2007-02-27 14:01:22 +01:00
2007-02-28 12:13:23 +01:00
// finallly flush the collected collections
indexrows_existing . addAll ( array_replace_multiple ( array_replace_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_replace_map = new TreeMap ( ) ; // delete references
indexrows_existing . addAll ( array_add_multiple ( array_add_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_add_map = new TreeMap ( ) ; // delete references
2007-02-27 14:01:22 +01:00
// write new containers
i = newContainer . iterator ( ) ;
2007-02-27 16:54:02 +01:00
ArrayList indexrows_new = new ArrayList ( ) ;
2007-02-27 14:01:22 +01:00
while ( i . hasNext ( ) ) {
record = ( Object [ ] ) i . next ( ) ; // {byte[], indexContainer}
key = ( byte [ ] ) record [ 0 ] ;
collection = ( indexContainer ) record [ 1 ] ;
indexrow = array_new ( key , collection ) ; // modifies indexrow
2007-02-27 16:54:02 +01:00
indexrows_new . add ( indexrow ) ; // collect new index rows
2007-02-27 14:01:22 +01:00
}
2007-02-28 12:13:23 +01:00
// remove all to-be-removed marked entries
arrayResolveRemoved ( ) ;
2007-02-27 14:01:22 +01:00
// write index entries
2007-02-27 16:54:02 +01:00
index . putMultiple ( indexrows_existing , new Date ( ) ) ; // write modified indexrows in optimized manner
index . addUniqueMultiple ( indexrows_new , new Date ( ) ) ; // write new indexrows in optimized manner
2007-02-26 16:49:23 +01:00
}
public synchronized void merge ( indexContainer container ) throws IOException , kelondroOutOfLimitsException {
if ( ( container = = null ) | | ( container . size ( ) = = 0 ) ) return ;
byte [ ] key = container . getWordHash ( ) . getBytes ( ) ;
2007-02-25 22:06:26 +01:00
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
2007-02-27 14:01:22 +01:00
indexrow = array_new ( key , container ) ; // modifies indexrow
index . addUnique ( indexrow ) ; // write modified indexrow
2007-02-26 16:49:23 +01:00
} else {
2007-02-27 14:01:22 +01:00
// merge with the old collection
// attention! this modifies the indexrow entry which must be written with index.put(indexrow) afterwards!
kelondroRowCollection collection = ( kelondroRowCollection ) container ;
// read old information
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
2007-04-03 14:10:12 +02:00
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) : " oldPartitionNumber = " + oldPartitionNumber + " , arrayIndex(oldchunkcount) = " + arrayIndex ( oldchunkcount ) ;
2007-02-27 14:01:22 +01:00
int oldSerialNumber = 0 ;
// load the old collection and join it
kelondroRowSet oldcollection = getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , oldSerialNumber , false ) ;
2007-03-03 01:55:51 +01:00
2007-02-27 14:01:22 +01:00
// join with new collection
oldcollection . addAllUnique ( collection ) ;
2007-03-14 09:55:05 +01:00
oldcollection . sort ( ) ;
2007-02-27 14:01:22 +01:00
oldcollection . uniq ( ) ; // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
2007-03-08 17:15:40 +01:00
oldcollection . trim ( false ) ;
2007-02-27 14:01:22 +01:00
collection = oldcollection ;
2007-04-03 14:10:12 +02:00
2007-03-03 01:55:51 +01:00
// check for size of collection:
// if necessary shrink the collection and dump a part of that collection
// to avoid that this grows too big
int newPartitionNumber ;
while ( ( newPartitionNumber = arrayIndex ( oldcollection . size ( ) ) ) > maxPartitions ) {
kelondroRowSet newcollection = shrinkCollection ( key , oldcollection , arrayCapacity ( maxPartitions ) ) ;
saveCommons ( key , oldcollection ) ;
oldcollection = newcollection ;
}
// work on with oldcollection
collection = oldcollection ;
newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
2007-02-27 14:01:22 +01:00
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
array_replace (
key , collection , indexrow ,
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
} else {
array_remove (
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
key , collection , indexrow ,
newPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
}
arrayResolveRemoved ( ) ; // remove all to-be-removed marked entries
2007-04-03 14:10:12 +02:00
final int collectionsize = collection . size ( ) ; // extra variable for easier debugging
final int indexrowcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
if ( indexrowcount ! = collectionsize )
serverLog . logSevere ( " kelondroCollectionIndex " , " UPDATE (merge) ERROR: array has different chunkcount than index after merge: index = " + indexrowcount + " , collection.size() = " + collectionsize ) ;
2007-02-26 16:49:23 +01:00
index . put ( indexrow ) ; // write modified indexrow
2007-02-25 22:06:26 +01:00
}
2007-02-26 16:49:23 +01:00
}
2007-03-03 01:55:51 +01:00
private kelondroRowSet shrinkCollection ( byte [ ] key , kelondroRowSet collection , int targetSize ) {
// removes entries from collection
// the removed entries are stored in a 'commons' dump file
// check if the collection is already small enough
int oldsize = collection . size ( ) ;
kelondroRowSet survival = new kelondroRowSet ( collection . rowdef , 0 ) ;
if ( oldsize < = targetSize ) return survival ;
// delete some entries, which are bad rated
Iterator i = collection . rows ( ) ;
kelondroRow . Entry entry ;
byte [ ] ref ;
while ( i . hasNext ( ) ) {
entry = ( kelondroRow . Entry ) i . next ( ) ;
ref = entry . getColBytes ( 0 ) ;
if ( ( ref . length = = 12 ) & & ( plasmaURL . probablyRootURL ( new String ( ref ) ) ) ) {
survival . addUnique ( entry ) ;
i . remove ( ) ;
}
}
int firstSurvival = survival . size ( ) ;
// check if we shrinked enough
Random rand = new Random ( System . currentTimeMillis ( ) ) ;
while ( survival . size ( ) > targetSize ) {
// now delete randomly more entries from the survival collection
i = survival . rows ( ) ;
while ( i . hasNext ( ) ) {
entry = ( kelondroRow . Entry ) i . next ( ) ;
ref = entry . getColBytes ( 0 ) ;
if ( rand . nextInt ( ) % 4 ! = 0 ) {
collection . addUnique ( entry ) ;
i . remove ( ) ;
}
}
}
serverLog . logInfo ( " kelondroCollectionIndex " , " shrinked common word " + new String ( key ) + " ; old size = " + oldsize + " , new size = " + collection . size ( ) + " , maximum size = " + targetSize + " , survival size = " + survival . size ( ) + " , first survival = " + firstSurvival ) ;
return survival ;
}
private void saveCommons ( byte [ ] key , kelondroRowSet collection ) {
if ( key . length ! = 12 ) return ;
2007-03-14 09:55:05 +01:00
collection . sort ( ) ;
2007-03-03 01:55:51 +01:00
TimeZone GMTTimeZone = TimeZone . getTimeZone ( " GMT " ) ;
Calendar gregorian = new GregorianCalendar ( GMTTimeZone ) ;
SimpleDateFormat formatter = new SimpleDateFormat ( " yyyyMMddHHmmss " ) ;
String filename = serverCodings . encodeHex ( kelondroBase64Order . enhancedCoder . decode ( new String ( key ) ) ) + " _ " + formatter . format ( gregorian . getTime ( ) ) + " .collection " ;
File storagePath = new File ( commonsPath , filename . substring ( 0 , 2 ) ) ; // make a subpath
storagePath . mkdirs ( ) ;
File file = new File ( storagePath , filename ) ;
try {
collection . saveCollection ( file ) ;
serverLog . logInfo ( " kelondroCollectionIndex " , " dumped common word " + new String ( key ) + " to " + file . toString ( ) + " ; size = " + collection . size ( ) ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
serverLog . logWarning ( " kelondroCollectionIndex " , " failed to dump common word " + new String ( key ) + " to " + file . toString ( ) + " ; size = " + collection . size ( ) ) ;
}
}
2007-02-26 16:49:23 +01:00
public synchronized int remove ( byte [ ] key , Set removekeys ) throws IOException , kelondroOutOfLimitsException {
if ( ( removekeys = = null ) | | ( removekeys . size ( ) = = 0 ) ) return 0 ;
2007-02-25 22:06:26 +01:00
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return 0 ;
// overwrite the old collection
// read old information
2007-02-26 16:49:23 +01:00
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
2007-02-25 22:06:26 +01:00
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
int removed = 0 ;
assert ( removekeys ! = null ) ;
// load the old collection and remove keys
2007-02-27 14:01:22 +01:00
kelondroRowSet oldcollection = getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , serialNumber , false ) ;
2007-02-25 22:06:26 +01:00
// remove the keys from the set
Iterator i = removekeys . iterator ( ) ;
Object k ;
while ( i . hasNext ( ) ) {
k = i . next ( ) ;
if ( ( k instanceof byte [ ] ) & & ( oldcollection . remove ( ( byte [ ] ) k ) ! = null ) ) removed + + ;
if ( ( k instanceof String ) & & ( oldcollection . remove ( ( ( String ) k ) . getBytes ( ) ) ! = null ) ) removed + + ;
}
2007-03-14 09:55:05 +01:00
oldcollection . sort ( ) ;
2007-03-08 17:15:40 +01:00
oldcollection . trim ( false ) ;
2007-02-25 22:06:26 +01:00
if ( oldcollection . size ( ) = = 0 ) {
// delete the index entry and the array
2007-02-27 14:01:22 +01:00
kelondroFixedWidthArray array = getArray ( oldPartitionNumber , serialNumber , oldchunksize ) ;
array . remove ( oldrownumber , false ) ;
2007-02-25 22:06:26 +01:00
index . remove ( key ) ;
return removed ;
}
int newPartitionNumber = arrayIndex ( oldcollection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
2007-02-27 14:01:22 +01:00
array_replace (
2007-02-25 22:06:26 +01:00
key , oldcollection , indexrow ,
2007-02-27 14:01:22 +01:00
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
2007-02-25 22:06:26 +01:00
} else {
2007-02-27 14:01:22 +01:00
array_remove (
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
2007-02-25 22:06:26 +01:00
key , oldcollection , indexrow ,
2007-02-27 14:01:22 +01:00
newPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
2007-02-25 22:06:26 +01:00
}
2007-02-27 14:01:22 +01:00
arrayResolveRemoved ( ) ; // remove all to-be-removed marked entries
2007-02-25 22:06:26 +01:00
index . put ( indexrow ) ; // write modified indexrow
return removed ;
}
2006-10-30 03:39:39 +01:00
public synchronized int indexSize ( byte [ ] key ) throws IOException {
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return 0 ;
return ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
2006-08-11 18:01:18 +02:00
}
2007-01-08 14:13:30 +01:00
public synchronized boolean has ( byte [ ] key ) throws IOException {
return index . has ( key ) ;
}
2006-12-06 13:51:46 +01:00
public synchronized kelondroRowSet get ( byte [ ] key ) throws IOException {
2006-03-01 01:25:02 +01:00
// find an entry, if one exists
2006-10-30 03:39:39 +01:00
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return null ;
kelondroRowSet col = getdelete ( indexrow , false ) ;
assert ( col ! = null ) ;
return col ;
2006-08-05 21:18:33 +02:00
}
2006-10-30 03:39:39 +01:00
public synchronized kelondroRowSet delete ( byte [ ] key ) throws IOException {
2006-08-05 21:18:33 +02:00
// find an entry, if one exists
2006-10-30 03:39:39 +01:00
kelondroRow . Entry indexrow = index . remove ( key ) ;
if ( indexrow = = null ) return null ;
kelondroRowSet removedCollection = getdelete ( indexrow , true ) ;
assert ( removedCollection ! = null ) ;
return removedCollection ;
2006-08-05 21:18:33 +02:00
}
2006-10-13 03:19:26 +02:00
2006-10-30 03:39:39 +01:00
protected kelondroRowSet getdelete ( kelondroRow . Entry indexrow , boolean remove ) throws IOException {
2006-08-08 01:29:26 +02:00
// call this only within a synchronized(index) environment
2006-03-01 01:25:02 +01:00
// read values
2006-10-06 01:47:08 +02:00
int chunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ;
int chunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
int rownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ;
int partitionnumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ;
2007-04-03 14:10:12 +02:00
assert ( partitionnumber > = arrayIndex ( chunkcount ) ) : " partitionnumber = " + partitionnumber + " , arrayIndex(chunkcount) = " + arrayIndex ( chunkcount ) ;
2006-08-05 21:18:33 +02:00
int serialnumber = 0 ;
2006-08-05 01:04:03 +02:00
2006-10-30 03:39:39 +01:00
return getwithparams ( indexrow , chunksize , chunkcount , partitionnumber , rownumber , serialnumber , remove ) ;
2006-10-13 03:19:26 +02:00
}
2007-04-03 14:10:12 +02:00
private synchronized kelondroRowSet getwithparams ( kelondroRow . Entry indexrow , int chunksize , int chunkcount , int clusteridx , int rownumber , int serialnumber , boolean remove ) throws IOException {
2006-03-01 01:25:02 +01:00
// open array entry
2006-10-13 03:19:26 +02:00
kelondroFixedWidthArray array = getArray ( clusteridx , serialnumber , chunksize ) ;
2006-08-05 01:04:03 +02:00
kelondroRow . Entry arrayrow = array . get ( rownumber ) ;
2006-10-13 03:19:26 +02:00
if ( arrayrow = = null ) throw new kelondroException ( arrayFile ( this . path , this . filenameStub , this . loadfactor , chunksize , clusteridx , serialnumber ) . toString ( ) , " array does not contain expected row " ) ;
2006-08-05 01:04:03 +02:00
2006-03-01 01:25:02 +01:00
// read the row and define a collection
2006-12-01 02:30:05 +01:00
byte [ ] indexkey = indexrow . getColBytes ( idx_col_key ) ;
byte [ ] arraykey = arrayrow . getColBytes ( 0 ) ;
2006-12-06 04:02:57 +01:00
if ( ! ( index . row ( ) . objectOrder . wellformed ( arraykey ) ) ) {
2006-12-01 02:30:05 +01:00
// cleanup for a bad bug that corrupted the database
index . remove ( indexkey ) ; // the RowCollection must be considered lost
2007-02-27 14:01:22 +01:00
array . remove ( rownumber , false ) ; // loose the RowCollection (we don't know how much is lost)
2006-12-01 02:30:05 +01:00
serverLog . logSevere ( " kelondroCollectionIndex. " + array . filename , " lost a RowCollection because of a bad arraykey " ) ;
return new kelondroRowSet ( this . payloadrow , 0 ) ;
}
2007-04-03 14:10:12 +02:00
kelondroRowSet collection = new kelondroRowSet ( this . payloadrow , arrayrow , 1 ) ; // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
2006-12-06 04:02:57 +01:00
if ( ( ! ( index . row ( ) . objectOrder . wellformed ( indexkey ) ) ) | | ( index . row ( ) . objectOrder . compare ( arraykey , indexkey ) ! = 0 ) ) {
2006-08-08 02:52:04 +02:00
// check if we got the right row; this row is wrong. Fix it:
2006-12-01 02:30:05 +01:00
index . remove ( indexkey ) ; // the wrong row cannot be fixed
2006-08-08 02:52:04 +02:00
// store the row number in the index; this may be a double-entry, but better than nothing
kelondroRow . Entry indexEntry = index . row ( ) . newEntry ( ) ;
indexEntry . setCol ( idx_col_key , arrayrow . getColBytes ( 0 ) ) ;
2006-11-05 03:10:40 +01:00
indexEntry . setCol ( idx_col_chunksize , this . payloadrow . objectsize ( ) ) ;
2006-08-11 05:20:44 +02:00
indexEntry . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
2006-10-13 03:19:26 +02:00
indexEntry . setCol ( idx_col_clusteridx , ( byte ) clusteridx ) ;
2006-10-06 01:47:08 +02:00
indexEntry . setCol ( idx_col_flags , ( byte ) 0 ) ;
2006-08-11 05:20:44 +02:00
indexEntry . setCol ( idx_col_indexpos , ( long ) rownumber ) ;
indexEntry . setCol ( idx_col_lastread , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexEntry . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
2006-08-08 02:52:04 +02:00
index . put ( indexEntry ) ;
2006-12-01 02:30:05 +01:00
serverLog . logSevere ( " kelondroCollectionIndex. " + array . filename , " array contains wrong row ' " + new String ( arrayrow . getColBytes ( 0 ) ) + " ', expected is ' " + new String ( indexrow . getColBytes ( idx_col_key ) ) + " ', the row has been fixed " ) ;
2006-08-08 01:29:26 +02:00
}
2006-08-05 01:04:03 +02:00
int chunkcountInArray = collection . size ( ) ;
2006-08-08 01:29:26 +02:00
if ( chunkcountInArray ! = chunkcount ) {
// fix the entry in index
2006-08-11 05:20:44 +02:00
indexrow . setCol ( idx_col_chunkcount , chunkcountInArray ) ;
2006-08-08 01:29:26 +02:00
index . put ( indexrow ) ;
2007-04-03 14:10:12 +02:00
array . logFailure ( " INCONSISTENCY (get) in " + arrayFile ( this . path , this . filenameStub , this . loadfactor , chunksize , clusteridx , serialnumber ) . toString ( ) + " : array has different chunkcount than index: index = " + chunkcount + " , array = " + chunkcountInArray + " ; the index has been auto-fixed " ) ;
2006-08-08 01:29:26 +02:00
}
2007-02-27 14:01:22 +01:00
if ( remove ) array . remove ( rownumber , false ) ; // index is removed in calling method
2006-08-05 01:04:03 +02:00
return collection ;
2006-03-01 01:25:02 +01:00
}
2006-10-13 03:19:26 +02:00
2007-03-08 23:07:17 +01:00
public synchronized Iterator keycollections ( byte [ ] startKey , byte [ ] secondKey , boolean rot ) {
2006-08-05 21:18:33 +02:00
// returns an iteration of {byte[], kelondroRowSet} Objects
try {
2007-03-08 23:07:17 +01:00
return new keycollectionIterator ( startKey , secondKey , rot ) ;
2006-08-05 21:18:33 +02:00
} catch ( IOException e ) {
e . printStackTrace ( ) ;
return null ;
}
}
2006-03-01 01:25:02 +01:00
2006-08-05 21:18:33 +02:00
public class keycollectionIterator implements Iterator {
2006-08-05 01:04:03 +02:00
2006-08-05 21:18:33 +02:00
Iterator indexRowIterator ;
2006-08-05 01:04:03 +02:00
2007-03-08 23:07:17 +01:00
public keycollectionIterator ( byte [ ] startKey , byte [ ] secondKey , boolean rot ) throws IOException {
2006-08-05 21:18:33 +02:00
// iterator of {byte[], kelondroRowSet} Objects
2007-03-08 17:15:40 +01:00
kelondroCloneableIterator i = index . rows ( true , startKey ) ;
2007-03-08 23:07:17 +01:00
indexRowIterator = ( rot ) ? new kelondroRotateIterator ( i , secondKey ) : i ;
2006-08-05 21:18:33 +02:00
}
2006-08-05 01:04:03 +02:00
2006-08-05 21:18:33 +02:00
public boolean hasNext ( ) {
return indexRowIterator . hasNext ( ) ;
}
public Object next ( ) {
kelondroRow . Entry indexrow = ( kelondroRow . Entry ) indexRowIterator . next ( ) ;
2006-10-30 03:39:39 +01:00
assert ( indexrow ! = null ) ;
2006-08-05 21:18:33 +02:00
if ( indexrow = = null ) return null ;
try {
2006-10-30 03:39:39 +01:00
return new Object [ ] { indexrow . getColBytes ( 0 ) , getdelete ( indexrow , false ) } ;
2006-08-05 21:18:33 +02:00
} catch ( IOException e ) {
e . printStackTrace ( ) ;
return null ;
}
}
public void remove ( ) {
indexRowIterator . remove ( ) ;
}
2006-08-05 01:04:03 +02:00
2006-03-01 01:25:02 +01:00
}
2006-08-05 21:18:33 +02:00
2006-10-30 03:39:39 +01:00
public synchronized void close ( ) throws IOException {
this . index . close ( ) ;
Iterator i = arrays . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) {
( ( kelondroFixedWidthArray ) i . next ( ) ) . close ( ) ;
2006-08-05 01:04:03 +02:00
}
}
2006-03-01 01:25:02 +01:00
public static void main ( String [ ] args ) {
2006-08-05 01:04:03 +02:00
// define payload structure
2006-12-06 04:02:57 +01:00
kelondroRow rowdef = new kelondroRow ( " byte[] a-10, byte[] b-80 " , kelondroNaturalOrder . naturalOrder , 0 ) ;
2006-08-05 01:04:03 +02:00
File path = new File ( args [ 0 ] ) ;
String filenameStub = args [ 1 ] ;
long preloadTime = 10000 ;
try {
// initialize collection index
kelondroCollectionIndex collectionIndex = new kelondroCollectionIndex (
path , filenameStub , 9 /*keyLength*/ ,
2007-03-06 23:43:32 +01:00
kelondroNaturalOrder . naturalOrder , preloadTime ,
2007-03-03 01:55:51 +01:00
4 /*loadfactor*/ , 7 , rowdef ) ;
2006-08-05 01:04:03 +02:00
// fill index with values
2006-12-06 04:02:57 +01:00
kelondroRowSet collection = new kelondroRowSet ( rowdef , 0 ) ;
2006-10-19 23:14:37 +02:00
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { " abc " . getBytes ( ) , " efg " . getBytes ( ) } ) ) ;
2006-08-05 01:04:03 +02:00
collectionIndex . put ( " erstes " . getBytes ( ) , collection ) ;
2007-04-03 14:10:12 +02:00
for ( int i = 0 ; i < = 170 ; i + + ) {
2006-12-06 04:02:57 +01:00
collection = new kelondroRowSet ( rowdef , 0 ) ;
2006-08-05 01:04:03 +02:00
for ( int j = 0 ; j < i ; j + + ) {
2006-10-19 23:14:37 +02:00
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { ( " abc " + j ) . getBytes ( ) , " xxx " . getBytes ( ) } ) ) ;
2006-08-05 01:04:03 +02:00
}
2006-08-05 21:18:33 +02:00
System . out . println ( " put key- " + i + " : " + collection . toString ( ) ) ;
2006-08-05 01:04:03 +02:00
collectionIndex . put ( ( " key- " + i ) . getBytes ( ) , collection ) ;
}
// extend collections with more values
2007-04-03 14:10:12 +02:00
for ( int i = 0 ; i < = 170 ; i + + ) {
2006-12-06 04:02:57 +01:00
collection = new kelondroRowSet ( rowdef , 0 ) ;
2006-08-05 01:04:03 +02:00
for ( int j = 0 ; j < i ; j + + ) {
2006-10-19 23:14:37 +02:00
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { ( " def " + j ) . getBytes ( ) , " xxx " . getBytes ( ) } ) ) ;
2006-08-05 01:04:03 +02:00
}
2007-02-26 16:49:23 +01:00
collectionIndex . merge ( new indexContainer ( " key- " + i , collection ) ) ;
2006-08-05 01:04:03 +02:00
}
// printout of index
2006-10-06 01:47:08 +02:00
collectionIndex . close ( ) ;
2007-04-05 12:14:48 +02:00
kelondroFlexTable index = new kelondroFlexTable ( path , filenameStub + " .index " , preloadTime , kelondroCollectionIndex . indexRow ( 9 , kelondroNaturalOrder . naturalOrder ) , true ) ;
2006-08-05 01:04:03 +02:00
index . print ( ) ;
index . close ( ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
}
2006-03-01 01:25:02 +01:00
}
}