2006-06-20 16:17:21 +02:00
// kelondroRowSet.java
2008-07-20 19:14:51 +02:00
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
2006-06-20 16:17:21 +02:00
// first published 20.06.2006 on http://www.anomic.de
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-01-30 16:33:00 +01:00
package de.anomic.kelondro.index ;
2006-06-20 16:17:21 +02:00
2008-04-06 22:31:16 +02:00
import java.io.DataInput ;
2008-03-31 00:58:42 +02:00
import java.io.IOException ;
2006-06-20 16:17:21 +02:00
import java.util.Iterator ;
2007-02-25 22:06:26 +01:00
import java.util.List ;
2006-06-20 16:17:21 +02:00
import java.util.Random ;
2006-12-01 02:30:05 +01:00
2009-01-30 23:08:08 +01:00
import de.anomic.kelondro.order.Base64Order ;
import de.anomic.kelondro.order.CloneableIterator ;
import de.anomic.kelondro.order.NaturalOrder ;
2009-01-30 16:33:00 +01:00
public class RowSet extends RowCollection implements ObjectIndex , Iterable < Row . Entry > {
2006-06-20 16:17:21 +02:00
2009-03-13 11:07:04 +01:00
private static final int collectionReSortLimit = 300 ;
2006-08-05 01:04:03 +02:00
2009-01-30 16:33:00 +01:00
public RowSet ( final RowSet rs ) {
2006-08-05 21:18:33 +02:00
super ( rs ) ;
}
2006-11-05 03:10:40 +01:00
2009-01-30 16:33:00 +01:00
public RowSet ( final Row rowdef , final int objectCount , final byte [ ] cache , final int sortBound ) {
2007-05-09 19:59:36 +02:00
super ( rowdef , objectCount , cache , sortBound ) ;
assert rowdef . objectOrder ! = null ;
}
2009-01-30 16:33:00 +01:00
public RowSet ( final Row rowdef , final int objectCount ) {
2006-06-20 16:17:21 +02:00
super ( rowdef , objectCount ) ;
2007-05-09 19:59:36 +02:00
assert rowdef . objectOrder ! = null ;
2006-06-20 16:17:21 +02:00
}
2009-01-01 23:31:16 +01:00
/ * *
* import an exported collection
* @param rowdef
* @param exportedCollectionRowEnvironment
* @param columnInEnvironment
* /
2009-01-30 16:33:00 +01:00
public RowSet ( final Row rowdef , final Row . Entry exportedCollectionRowEnvironment ) {
2009-01-01 23:31:16 +01:00
super ( rowdef , exportedCollectionRowEnvironment ) ;
2007-05-09 19:59:36 +02:00
assert rowdef . objectOrder ! = null ;
2006-08-05 01:04:03 +02:00
}
2007-05-09 19:59:36 +02:00
2009-01-30 16:33:00 +01:00
public static RowSet importRowSet ( final DataInput is , final Row rowdef ) throws IOException {
2009-01-01 23:31:16 +01:00
final byte [ ] byte6 = new byte [ 6 ] ;
final int size = is . readInt ( ) ;
is . readFully ( byte6 ) ;
//short lastread = (short) kelondroNaturalOrder.decodeLong(byte2);
//short lastwrote = (short) kelondroNaturalOrder.decodeLong(byte2);
//String orderkey = new String(byte2);
final int orderbound = is . readInt ( ) ;
2008-08-02 14:12:04 +02:00
final byte [ ] chunkcache = new byte [ size * rowdef . objectsize ] ;
2008-04-02 15:18:23 +02:00
is . readFully ( chunkcache ) ;
2009-01-30 16:33:00 +01:00
return new RowSet ( rowdef , size , chunkcache , orderbound ) ;
2008-03-31 00:58:42 +02:00
}
2009-01-30 16:33:00 +01:00
public static RowSet importRowSet ( byte [ ] b , final Row rowdef ) {
2009-04-21 00:08:38 +02:00
assert b . length > = 14 : " b.length = " + b . length ;
2009-01-30 16:33:00 +01:00
final int size = ( int ) NaturalOrder . decodeLong ( b , 0 , 4 ) ;
final int orderbound = ( int ) NaturalOrder . decodeLong ( b , 10 , 4 ) ;
2009-01-01 23:31:16 +01:00
final byte [ ] chunkcache = new byte [ size * rowdef . objectsize ] ;
assert b . length - exportOverheadSize = = size * rowdef . objectsize ;
System . arraycopy ( b , 14 , chunkcache , 0 , chunkcache . length ) ;
2009-01-30 16:33:00 +01:00
return new RowSet ( rowdef , size , chunkcache , orderbound ) ;
2008-04-02 15:18:23 +02:00
}
2007-04-05 12:14:48 +02:00
public void reset ( ) {
super . reset ( ) ;
}
2008-08-02 14:12:04 +02:00
public synchronized boolean has ( final byte [ ] key ) {
final int index = find ( key , 0 , key . length ) ;
2008-06-18 01:56:39 +02:00
return index > = 0 ;
2007-01-08 14:13:30 +01:00
}
2009-01-30 16:33:00 +01:00
public synchronized Row . Entry get ( final byte [ ] key ) {
2006-06-20 16:17:21 +02:00
return get ( key , 0 , key . length ) ;
}
2009-01-30 16:33:00 +01:00
private Row . Entry get ( final byte [ ] key , final int astart , final int alength ) {
2008-08-02 14:12:04 +02:00
final int index = find ( key , astart , alength ) ;
2009-01-30 16:33:00 +01:00
final Row . Entry entry = ( index > = 0 ) ? get ( index , true ) : null ;
2006-06-23 14:49:42 +02:00
return entry ;
2006-06-20 16:17:21 +02:00
}
2009-03-11 21:23:19 +01:00
public synchronized void put ( final List < Row . Entry > rows ) {
2009-01-30 16:33:00 +01:00
final Iterator < Row . Entry > i = rows . iterator ( ) ;
2007-12-27 18:56:59 +01:00
while ( i . hasNext ( ) ) put ( i . next ( ) ) ;
2007-02-25 22:06:26 +01:00
}
2009-03-11 21:23:19 +01:00
public synchronized void put ( final Row . Entry entry ) {
assert ( entry ! = null ) ;
assert ( entry . getPrimaryKeyBytes ( ) ! = null ) ;
// when reaching a specific amount of un-sorted entries, re-sort all
if ( ( this . chunkcount - this . sortBound ) > collectionReSortLimit ) {
sort ( ) ;
}
2009-03-13 17:52:31 +01:00
int index = find ( entry . bytes ( ) , 0 , super . rowdef . primaryKeyLength ) ;
2009-03-11 21:23:19 +01:00
if ( index < 0 ) {
super . addUnique ( entry ) ;
} else {
int sb = this . sortBound ; // save the sortBound, because it is not altered (we replace at the same place)
set ( index , entry ) ; // this may alter the sortBound, which we will revert in the next step
this . sortBound = sb ; // revert a sortBound altering
}
2006-10-13 01:14:41 +02:00
}
2009-03-11 21:23:19 +01:00
public synchronized Row . Entry replace ( final Row . Entry entry ) {
2006-10-24 15:48:16 +02:00
assert ( entry ! = null ) ;
2007-11-07 23:38:09 +01:00
assert ( entry . getPrimaryKeyBytes ( ) ! = null ) ;
2006-06-21 00:13:17 +02:00
int index = - 1 ;
2009-01-30 16:33:00 +01:00
Row . Entry oldentry = null ;
2008-01-23 21:18:36 +01:00
// when reaching a specific amount of un-sorted entries, re-sort all
if ( ( this . chunkcount - this . sortBound ) > collectionReSortLimit ) {
sort ( ) ;
}
2009-03-13 17:52:31 +01:00
index = find ( entry . bytes ( ) , 0 , super . rowdef . primaryKeyLength ) ;
2007-03-14 09:55:05 +01:00
if ( index < 0 ) {
super . addUnique ( entry ) ;
} else {
2008-04-24 15:31:55 +02:00
oldentry = get ( index , true ) ;
2009-01-29 17:08:24 +01:00
int sb = this . sortBound ; // save the sortBound, because it is not altered (we replace at the same place)
set ( index , entry ) ; // this may alter the sortBound, which we will revert in the next step
this . sortBound = sb ; // revert a sortBound altering
2006-06-21 00:13:17 +02:00
}
2006-06-23 14:49:42 +02:00
return oldentry ;
2006-06-21 00:13:17 +02:00
}
2006-06-22 17:42:09 +02:00
2009-03-11 16:10:38 +01:00
public synchronized long inc ( byte [ ] key , int col , long add , Row . Entry initrow ) {
final int index = find ( key , 0 , key . length ) ;
if ( index > = 0 ) {
// the entry existed before
final Row . Entry entry = get ( index , false ) ; // no clone necessary
long l = entry . incCol ( col , add ) ;
set ( index , entry ) ;
return l ;
} else if ( initrow ! = null ) {
// create new entry
super . addUnique ( initrow ) ;
return initrow . getColLong ( col ) ;
} else {
// if initrow == null just do nothing
// but return a Long.MIN_VALUE
return Long . MIN_VALUE ;
}
}
2009-01-30 16:33:00 +01:00
private synchronized Row . Entry remove ( final byte [ ] a , final int start , final int length ) {
2008-08-02 14:12:04 +02:00
final int index = find ( a , start , length ) ;
2007-03-14 09:55:05 +01:00
if ( index < 0 ) return null ;
2009-01-30 16:33:00 +01:00
final Row . Entry entry = super . get ( index , true ) ;
2009-01-09 01:06:36 +01:00
super . removeRow ( index , true ) ; // keep order of collection!
2008-06-15 00:51:47 +02:00
int findagainindex = 0 ;
2009-01-30 16:33:00 +01:00
assert ( findagainindex = find ( a , start , length ) ) < 0 : " remove: chunk found again at index position (after remove) " + findagainindex + " , index(before) = " + index + " , inset= " + NaturalOrder . arrayList ( super . chunkcache , super . rowdef . objectsize * findagainindex , length ) + " , searchkey= " + NaturalOrder . arrayList ( a , start , length ) ; // check if the remove worked
2007-03-14 09:55:05 +01:00
return entry ;
2006-06-22 17:42:09 +02:00
}
2006-10-16 02:27:25 +02:00
2009-01-06 10:38:08 +01:00
/ * *
* remove a byte [ ] from the set .
* if the entry was found , return the entry , but delete the entry from the set
* if the entry was not found , return null .
* /
2009-01-30 16:33:00 +01:00
public Row . Entry remove ( final byte [ ] a ) {
2008-07-05 02:35:20 +02:00
return remove ( a , 0 , a . length ) ;
2006-06-22 17:42:09 +02:00
}
2006-06-20 16:17:21 +02:00
2008-08-02 14:12:04 +02:00
private int find ( final byte [ ] a , final int astart , final int alength ) {
2006-06-20 16:17:21 +02:00
// returns the chunknumber; -1 if not found
2006-12-06 04:02:57 +01:00
if ( rowdef . objectOrder = = null ) return iterativeSearch ( a , astart , alength , 0 , this . chunkcount ) ;
2008-01-25 00:58:18 +01:00
2009-03-13 11:07:04 +01:00
if ( ( this . chunkcount - this . sortBound ) > collectionReSortLimit ) {
2008-01-25 00:58:18 +01:00
sort ( ) ;
}
2006-06-20 16:17:21 +02:00
2009-01-30 16:33:00 +01:00
if ( ( this . rowdef . objectOrder ! = null ) & & ( this . rowdef . objectOrder instanceof Base64Order ) & & ( this . sortBound > 4000 ) ) {
2007-11-12 02:14:51 +01:00
// first try to find in sorted area
2008-07-11 12:59:06 +02:00
assert this . rowdef . objectOrder . wellformed ( a , astart , alength ) : " not wellformed: " + new String ( a , astart , alength ) ;
2009-04-22 00:12:19 +02:00
final int p = binarySearch ( a , astart , alength ) ;
2007-11-12 02:14:51 +01:00
if ( p > = 0 ) return p ;
// then find in unsorted area
2009-03-13 10:30:19 +01:00
return iterativeSearch ( a , astart , alength , this . sortBound , this . chunkcount ) ;
2007-11-12 02:14:51 +01:00
} else {
// first try to find in sorted area
2008-08-02 14:12:04 +02:00
final int p = binarySearch ( a , astart , alength ) ;
2007-11-12 02:14:51 +01:00
if ( p > = 0 ) return p ;
2006-06-20 16:17:21 +02:00
2007-11-12 02:14:51 +01:00
// then find in unsorted area
return iterativeSearch ( a , astart , alength , this . sortBound , this . chunkcount ) ;
}
2006-06-20 16:17:21 +02:00
}
2008-08-02 14:12:04 +02:00
private int iterativeSearch ( final byte [ ] key , final int astart , final int alength , final int leftBorder , final int rightBound ) {
2007-11-12 02:14:51 +01:00
// returns the chunknumber
for ( int i = leftBorder ; i < rightBound ; i + + ) {
2009-03-13 10:30:19 +01:00
if ( match ( key , astart , alength , i ) ) return i ;
2007-11-12 02:14:51 +01:00
}
return - 1 ;
}
2008-08-02 14:12:04 +02:00
private int binarySearch ( final byte [ ] key , final int astart , final int alength ) {
2006-07-03 17:14:54 +02:00
// returns the exact position of the key if the key exists,
// or -1 if the key does not exist
2006-12-06 04:02:57 +01:00
assert ( rowdef . objectOrder ! = null ) ;
2006-06-20 16:17:21 +02:00
int l = 0 ;
int rbound = this . sortBound ;
int p = 0 ;
int d ;
while ( l < rbound ) {
p = l + ( ( rbound - l ) > > 1 ) ;
d = compare ( key , astart , alength , p ) ;
if ( d = = 0 ) return p ;
2007-11-12 02:14:51 +01:00
if ( d < 0 ) rbound = p ; else l = p + 1 ;
}
return - 1 ;
}
2006-06-20 16:17:21 +02:00
2009-01-02 13:33:06 +01:00
private int binaryPosition ( final byte [ ] key , final int astart , final int alength ) {
2006-07-03 17:14:54 +02:00
// returns the exact position of the key if the key exists,
// or a position of an entry that is greater than the key if the
// key does not exist
2006-12-06 04:02:57 +01:00
assert ( rowdef . objectOrder ! = null ) ;
2006-07-03 17:14:54 +02:00
int l = 0 ;
int rbound = this . sortBound ;
int p = 0 ;
int d ;
while ( l < rbound ) {
p = l + ( ( rbound - l ) > > 1 ) ;
d = compare ( key , astart , alength , p ) ;
if ( d = = 0 ) return p ;
2007-11-12 02:14:51 +01:00
if ( d < 0 ) rbound = p ; else l = p + 1 ;
2006-07-03 17:14:54 +02:00
}
return l ;
}
2006-06-20 16:17:21 +02:00
2007-12-27 18:56:59 +01:00
public synchronized Iterator < byte [ ] > keys ( ) {
sort ( ) ;
2009-01-09 01:06:36 +01:00
return super . keys ( true ) ;
2007-12-27 18:56:59 +01:00
}
2009-01-30 23:08:08 +01:00
public synchronized CloneableIterator < byte [ ] > keys ( final boolean up , final byte [ ] firstKey ) {
2007-12-27 18:56:59 +01:00
return new keyIterator ( up , firstKey ) ;
}
2009-01-30 23:08:08 +01:00
public class keyIterator implements CloneableIterator < byte [ ] > {
2007-12-27 18:56:59 +01:00
2008-08-02 14:12:04 +02:00
private final boolean up ;
private final byte [ ] first ;
private int p ;
2008-08-02 15:57:00 +02:00
final int bound ;
2007-12-27 18:56:59 +01:00
2008-08-02 14:12:04 +02:00
public keyIterator ( final boolean up , final byte [ ] firstKey ) {
2007-12-27 18:56:59 +01:00
// see that all elements are sorted
sort ( ) ;
this . up = up ;
this . first = firstKey ;
this . bound = sortBound ;
if ( first = = null ) {
p = 0 ;
} else {
p = binaryPosition ( first , 0 , first . length ) ; // check this to find bug in DHT selection enumeration
//System.out.println("binaryposition for key " + new String(firstKey) + " is " + p);
}
}
2008-08-02 14:12:04 +02:00
public keyIterator clone ( final Object second ) {
2007-12-27 18:56:59 +01:00
return new keyIterator ( up , ( byte [ ] ) second ) ;
}
public boolean hasNext ( ) {
if ( p < 0 ) return false ;
if ( p > = size ( ) ) return false ;
if ( up ) {
return p < bound ;
} else {
return p > = 0 ;
}
}
public byte [ ] next ( ) {
2008-08-02 14:12:04 +02:00
final byte [ ] key = getKey ( p ) ;
2007-12-27 18:56:59 +01:00
if ( up ) p + + ; else p - - ;
return key ;
}
public void remove ( ) {
throw new UnsupportedOperationException ( ) ;
}
}
2009-01-30 16:33:00 +01:00
public synchronized Iterator < Row . Entry > iterator ( ) {
2007-05-16 12:48:26 +02:00
// iterates kelondroRow.Entry - type entries
2007-03-14 09:55:05 +01:00
sort ( ) ;
2009-01-02 12:38:20 +01:00
return super . iterator ( ) ;
2006-07-04 16:47:27 +02:00
}
2009-01-30 23:08:08 +01:00
public synchronized CloneableIterator < Row . Entry > rows ( final boolean up , final byte [ ] firstKey ) {
2007-03-08 17:15:40 +01:00
return new rowIterator ( up , firstKey ) ;
2006-07-03 17:14:54 +02:00
}
2009-02-24 11:40:20 +01:00
public synchronized CloneableIterator < Row . Entry > rows ( ) {
return new rowIterator ( true , null ) ;
}
2009-01-30 23:08:08 +01:00
public class rowIterator implements CloneableIterator < Row . Entry > {
2006-07-03 17:14:54 +02:00
2008-08-02 14:12:04 +02:00
private final boolean up ;
private final byte [ ] first ;
private int p ;
2008-08-02 15:57:00 +02:00
final int bound ;
2006-07-03 17:14:54 +02:00
2008-08-02 14:12:04 +02:00
public rowIterator ( final boolean up , final byte [ ] firstKey ) {
2006-11-20 01:27:02 +01:00
// see that all elements are sorted
2007-03-14 09:55:05 +01:00
sort ( ) ;
2006-07-03 17:14:54 +02:00
this . up = up ;
this . first = firstKey ;
this . bound = sortBound ;
if ( first = = null ) {
p = 0 ;
} else {
2007-05-09 19:59:36 +02:00
p = binaryPosition ( first , 0 , first . length ) ; // check this to find bug in DHT selection enumeration
2007-05-10 15:27:38 +02:00
//System.out.println("binaryposition for key " + new String(firstKey) + " is " + p);
2006-07-03 17:14:54 +02:00
}
}
2008-08-02 14:12:04 +02:00
public rowIterator clone ( final Object second ) {
2007-03-08 23:07:17 +01:00
return new rowIterator ( up , ( byte [ ] ) second ) ;
2007-03-08 17:15:40 +01:00
}
2006-07-03 17:14:54 +02:00
public boolean hasNext ( ) {
2007-04-26 11:51:51 +02:00
if ( p < 0 ) return false ;
if ( p > = size ( ) ) return false ;
2006-07-03 17:14:54 +02:00
if ( up ) {
return p < bound ;
} else {
return p > = 0 ;
}
}
2009-01-30 16:33:00 +01:00
public Row . Entry next ( ) {
final Row . Entry entry = get ( p , true ) ;
2006-07-03 17:14:54 +02:00
if ( up ) p + + ; else p - - ;
return entry ;
}
public void remove ( ) {
throw new UnsupportedOperationException ( ) ;
}
}
2009-01-02 13:33:06 +01:00
/ * *
* merge this row collection with another row collection .
* The resulting collection is sorted and does not contain any doubles , which are also removed during the merge .
* The new collection may be a copy of one of the old one , or can be an alteration of one of the input collections
* After this merge , none of the input collections should be used , because they can be altered
* @param c
* @return
* /
2009-01-30 16:33:00 +01:00
public RowSet merge ( RowSet c ) {
2009-03-31 21:17:45 +02:00
assert c ! = null ;
2009-01-02 13:33:06 +01:00
/ *
if ( this . isSorted ( ) & & this . size ( ) > = c . size ( ) ) {
return mergeInsert ( this , c ) ;
} * /
return mergeEnum ( this , c ) ;
}
/ *
private static kelondroRowSet mergeInsert ( kelondroRowSet sorted , kelondroRowCollection small ) {
assert sorted . rowdef = = small . rowdef ;
assert sorted . isSorted ( ) ;
assert small . size ( ) < = sorted . size ( ) ;
sorted . ensureSize ( sorted . size ( ) + small . size ( ) ) ;
for ( int i = 0 ; i < small . size ( ) ; i + + ) {
}
return sorted ;
}
* /
/ * *
* merge this row collection with another row collection using an simultanous iteration of the input collections
* the current collection is not altered in any way , the returned collection is a new collection with copied content .
* @param c
* @return
* /
2009-01-30 16:33:00 +01:00
protected static RowSet mergeEnum ( RowCollection c0 , RowCollection c1 ) {
2009-03-31 21:17:45 +02:00
assert c0 . rowdef = = c1 . rowdef : c0 . rowdef . toString ( ) + " != " + c1 . rowdef . toString ( ) ;
2009-01-30 16:33:00 +01:00
RowSet r = new RowSet ( c0 . rowdef , c0 . size ( ) + c1 . size ( ) ) ;
2009-01-02 13:33:06 +01:00
c0 . sort ( ) ;
c1 . sort ( ) ;
int c0i = 0 , c1i = 0 ;
int c0p , c1p ;
int o ;
final int objectsize = c0 . rowdef . objectsize ;
while ( c0i < c0 . size ( ) & & c1i < c1 . size ( ) ) {
c0p = c0i * objectsize ;
c1p = c1i * objectsize ;
o = c0 . rowdef . objectOrder . compare (
c0 . chunkcache , c0p , c0 . rowdef . primaryKeyLength ,
c1 . chunkcache , c1p , c0 . rowdef . primaryKeyLength ) ;
if ( o = = 0 ) {
r . addSorted ( c0 . chunkcache , c0p , objectsize ) ;
c0i + + ;
c1i + + ;
continue ;
}
if ( o < 0 ) {
r . addSorted ( c0 . chunkcache , c0p , objectsize ) ;
c0i + + ;
continue ;
}
if ( o > 0 ) {
r . addSorted ( c1 . chunkcache , c1p , objectsize ) ;
c1i + + ;
continue ;
}
}
while ( c0i < c0 . size ( ) ) {
r . addSorted ( c0 . chunkcache , c0i * objectsize , objectsize ) ;
c0i + + ;
}
while ( c1i < c1 . size ( ) ) {
r . addSorted ( c1 . chunkcache , c1i * objectsize , objectsize ) ;
c1i + + ;
}
return r ;
}
2006-07-03 17:14:54 +02:00
2008-08-02 14:12:04 +02:00
public static void main ( final String [ ] args ) {
2007-04-20 09:53:58 +02:00
// sort/uniq-test
2007-05-16 12:48:26 +02:00
/ *
2007-04-20 09:53:58 +02:00
kelondroRow rowdef = new kelondroRow ( " Cardinal key-4 {b256}, byte[] payload-1 " , kelondroNaturalOrder . naturalOrder , 0 ) ;
kelondroRowSet rs = new kelondroRowSet ( rowdef , 0 ) ;
Random random = new Random ( 0 ) ;
kelondroRow . Entry entry ;
for ( int i = 0 ; i < 10000000 ; i + + ) {
entry = rowdef . newEntry ( ) ;
entry . setCol ( 0 , Math . abs ( random . nextLong ( ) % 1000000 ) ) ;
entry . setCol ( 1 , " a " . getBytes ( ) ) ;
rs . addUnique ( entry ) ;
}
System . out . println ( " before sort, size = " + rs . size ( ) ) ;
rs . sort ( ) ;
System . out . println ( " after sort, before uniq, size = " + rs . size ( ) ) ;
rs . uniq ( 10000 ) ;
System . out . println ( " after uniq, size = " + rs . size ( ) ) ;
2007-05-16 12:48:26 +02:00
* /
2007-04-20 09:53:58 +02:00
2009-04-22 00:12:19 +02:00
final String [ ] test = {
" eins......xxxx " ,
" zwei......xxxx " ,
" drei......xxxx " ,
" vier......xxxx " ,
" fuenf.....xxxx " ,
" sechs.....xxxx " ,
" sieben....xxxx " ,
" acht......xxxx " ,
" neun......xxxx " ,
" zehn......xxxx " } ;
2009-03-13 17:52:31 +01:00
final RowSet d = new RowSet ( new Row ( " byte[] key-10, Cardinal x-4 {b256} " , NaturalOrder . naturalOrder ) , 0 ) ;
2007-05-16 12:48:26 +02:00
for ( int ii = 0 ; ii < test . length ; ii + + ) d . add ( test [ ii ] . getBytes ( ) ) ;
for ( int ii = 0 ; ii < test . length ; ii + + ) d . add ( test [ ii ] . getBytes ( ) ) ;
d . sort ( ) ;
2008-07-05 02:35:20 +02:00
d . remove ( " fuenf " . getBytes ( ) , 0 , 5 ) ;
2009-01-30 16:33:00 +01:00
final Iterator < Row . Entry > ii = d . iterator ( ) ;
2006-06-20 16:17:21 +02:00
String s ;
System . out . print ( " INPUT-ITERATOR: " ) ;
2009-01-30 16:33:00 +01:00
Row . Entry entry ;
2007-05-16 12:48:26 +02:00
while ( ii . hasNext ( ) ) {
2008-06-06 18:01:27 +02:00
entry = ii . next ( ) ;
s = new String ( entry . getColBytes ( 0 ) ) . trim ( ) ;
2006-06-20 16:17:21 +02:00
System . out . print ( s + " , " ) ;
2007-05-16 12:48:26 +02:00
if ( s . equals ( " drei " ) ) ii . remove ( ) ;
2006-06-20 16:17:21 +02:00
}
System . out . println ( " " ) ;
2007-05-16 12:48:26 +02:00
System . out . println ( " INPUT-TOSTRING: " + d . toString ( ) ) ;
d . sort ( ) ;
System . out . println ( " SORTED : " + d . toString ( ) ) ;
2007-11-09 01:51:38 +01:00
d . uniq ( ) ;
2007-05-16 12:48:26 +02:00
System . out . println ( " UNIQ : " + d . toString ( ) ) ;
d . trim ( false ) ;
System . out . println ( " TRIM : " + d . toString ( ) ) ;
2006-06-20 16:17:21 +02:00
2006-06-30 01:01:42 +02:00
/ *
2006-06-20 16:17:21 +02:00
// second test
c = new kelondroRowSet ( new kelondroRow ( new int [ ] { 10 , 3 } ) ) ;
c . setOrdering ( kelondroNaturalOrder . naturalOrder , 0 ) ;
Random rand = new Random ( 0 ) ;
long start = System . currentTimeMillis ( ) ;
long t , d = 0 ;
String w ;
for ( long k = 0 ; k < 60000 ; k + + ) {
t = System . currentTimeMillis ( ) ;
w = " a " + Long . toString ( rand . nextLong ( ) ) ;
2006-06-21 01:47:51 +02:00
c . add ( w . getBytes ( ) ) ;
2006-06-20 16:17:21 +02:00
if ( k % 10000 = = 0 )
System . out . println ( " added " + k + " entries in " +
( ( t - start ) / 1000 ) + " seconds, " +
( ( ( t - start ) > 1000 ) ? ( k / ( ( t - start ) / 1000 ) ) : k ) +
" entries/second, size = " + c . size ( ) ) ;
}
System . out . println ( " bevore sort: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
2006-06-22 17:42:09 +02:00
c . shape ( ) ;
2006-06-20 16:17:21 +02:00
System . out . println ( " after sort: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
c . uniq ( ) ;
System . out . println ( " after uniq: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
System . out . println ( " RESULT SIZE: " + c . size ( ) ) ;
System . out . println ( ) ;
// third test
c = new kelondroRowSet ( new kelondroRow ( new int [ ] { 10 , 3 } ) , 60000 ) ;
c . setOrdering ( kelondroNaturalOrder . naturalOrder , 0 ) ;
rand = new Random ( 0 ) ;
start = System . currentTimeMillis ( ) ;
d = 0 ;
for ( long k = 0 ; k < 60000 ; k + + ) {
t = System . currentTimeMillis ( ) ;
w = " a " + Long . toString ( rand . nextLong ( ) ) ;
2006-06-21 01:47:51 +02:00
if ( c . get ( w . getBytes ( ) , 0 , 10 ) = = null ) c . add ( w . getBytes ( ) ) ; else d + + ;
2006-06-20 16:17:21 +02:00
if ( k % 10000 = = 0 )
System . out . println ( " added " + k + " entries in " +
( ( t - start ) / 1000 ) + " seconds, " +
( ( ( t - start ) > 1000 ) ? ( k / ( ( t - start ) / 1000 ) ) : k ) +
" entries/second, " + d + " double, size = " + c . size ( ) +
" , sum = " + ( c . size ( ) + d ) ) ;
}
System . out . println ( " RESULT SIZE: " + c . size ( ) ) ;
2006-06-30 01:01:42 +02:00
* /
2006-10-04 00:55:59 +02:00
/ *
// performance test for put
2006-06-30 01:01:42 +02:00
long start = System . currentTimeMillis ( ) ;
2006-08-11 05:20:44 +02:00
kelondroRowSet c = new kelondroRowSet ( new kelondroRow ( " byte[] a-12, byte[] b-12 " ) , 0 ) ;
2006-06-30 01:01:42 +02:00
Random random = new Random ( 0 ) ;
byte [ ] key ;
for ( int i = 0 ; i < 100000 ; i + + ) {
key = randomHash ( random ) ;
c . put ( c . rowdef . newEntry ( new byte [ ] [ ] { key , key } ) ) ;
if ( i % 1000 = = 0 ) System . out . println ( i + " entries. " ) ;
}
System . out . println ( " RESULT SIZE: " + c . size ( ) ) ;
System . out . println ( " Time: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
2006-10-04 00:55:59 +02:00
* /
// remove test
2008-08-02 14:12:04 +02:00
final long start = System . currentTimeMillis ( ) ;
2009-03-13 17:52:31 +01:00
final RowSet c = new RowSet ( new Row ( " byte[] a-12, byte[] b-12 " , Base64Order . enhancedCoder ) , 0 ) ;
2006-10-04 00:55:59 +02:00
byte [ ] key ;
2008-08-02 14:12:04 +02:00
final int testsize = 5000 ;
final byte [ ] [ ] delkeys = new byte [ testsize / 5 ] [ ] ;
2007-05-16 12:48:26 +02:00
Random random = new Random ( 0 ) ;
2006-10-04 00:55:59 +02:00
for ( int i = 0 ; i < testsize ; i + + ) {
key = randomHash ( random ) ;
if ( i % 5 ! = 0 ) continue ;
delkeys [ i / 5 ] = key ;
}
random = new Random ( 0 ) ;
for ( int i = 0 ; i < testsize ; i + + ) {
key = randomHash ( random ) ;
c . put ( c . rowdef . newEntry ( new byte [ ] [ ] { key , key } ) ) ;
if ( i % 1000 = = 0 ) {
2008-07-05 02:35:20 +02:00
for ( int j = 0 ; j < delkeys . length ; j + + ) c . remove ( delkeys [ j ] ) ;
2007-03-14 09:55:05 +01:00
c . sort ( ) ;
2006-10-04 00:55:59 +02:00
}
}
2008-07-05 02:35:20 +02:00
for ( int j = 0 ; j < delkeys . length ; j + + ) c . remove ( delkeys [ j ] ) ;
2007-03-14 09:55:05 +01:00
c . sort ( ) ;
2006-10-04 00:55:59 +02:00
random = new Random ( 0 ) ;
for ( int i = 0 ; i < testsize ; i + + ) {
key = randomHash ( random ) ;
if ( i % 5 = = 0 ) continue ;
if ( c . get ( key ) = = null ) System . out . println ( " missing entry " + new String ( key ) ) ;
}
2007-03-14 09:55:05 +01:00
c . sort ( ) ;
2006-10-04 00:55:59 +02:00
System . out . println ( " RESULT SIZE: " + c . size ( ) ) ;
System . out . println ( " Time: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
2006-06-30 01:01:42 +02:00
}
public static byte [ ] randomHash ( final long r0 , final long r1 ) {
// a long can have 64 bit, but a 12-byte hash can have 6 * 12 = 72 bits
// so we construct a generic Hash using two long values
2009-01-30 16:33:00 +01:00
return ( Base64Order . enhancedCoder . encodeLong ( Math . abs ( r0 ) , 11 ) . substring ( 5 ) +
Base64Order . enhancedCoder . encodeLong ( Math . abs ( r1 ) , 11 ) . substring ( 5 ) ) . getBytes ( ) ;
2006-06-30 01:01:42 +02:00
}
2008-08-02 14:12:04 +02:00
public static byte [ ] randomHash ( final Random r ) {
2006-06-30 01:01:42 +02:00
return randomHash ( r . nextLong ( ) , r . nextLong ( ) ) ;
2006-06-20 16:17:21 +02:00
}
2007-03-06 23:43:32 +01:00
public String filename ( ) {
return null ;
}
2007-04-05 12:14:48 +02:00
2008-09-12 13:51:48 +02:00
public void deleteOnExit ( ) {
// do nothing, there is no file
}
2009-01-31 02:06:56 +01:00
2006-06-20 16:17:21 +02:00
}