2009-10-10 01:32:08 +02:00
// RowCollection.java
2008-07-20 19:14:51 +02:00
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
2006-06-20 16:17:21 +02:00
// first published 12.01.2006 on http://www.anomic.de
//
2011-03-08 02:51:51 +01:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2006-06-20 16:17:21 +02:00
//
// LICENSE
2011-07-14 23:42:30 +02:00
//
2006-06-20 16:17:21 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-10-10 01:32:08 +02:00
package net.yacy.kelondro.index ;
2006-06-20 16:17:21 +02:00
2007-03-03 01:55:51 +01:00
import java.io.File ;
2007-02-27 16:54:02 +01:00
import java.io.IOException ;
2008-01-20 02:22:46 +01:00
import java.util.ArrayList ;
2006-06-20 16:17:21 +02:00
import java.util.Iterator ;
2007-02-27 16:54:02 +01:00
import java.util.List ;
2007-11-09 01:51:38 +01:00
import java.util.Random ;
2008-03-20 00:47:24 +01:00
import java.util.concurrent.Callable ;
2006-06-20 16:17:21 +02:00
2011-05-27 10:24:54 +02:00
import net.yacy.cora.document.ASCII ;
2011-03-07 21:36:40 +01:00
import net.yacy.cora.document.UTF8 ;
2011-07-16 21:13:30 +02:00
import net.yacy.cora.storage.Array ;
import net.yacy.cora.storage.Sortable ;
2011-07-16 12:08:43 +02:00
import net.yacy.kelondro.index.Row.Entry ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Base64Order ;
import net.yacy.kelondro.order.ByteOrder ;
import net.yacy.kelondro.order.NaturalOrder ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.FileUtils ;
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.kelondro.util.kelondroException ;
2009-10-10 01:13:30 +02:00
2006-10-24 15:48:16 +02:00
2011-07-16 12:08:43 +02:00
public class RowCollection implements Sortable < Row . Entry > , Iterable < Row . Entry > , Cloneable {
2006-06-20 16:17:21 +02:00
2009-10-27 16:25:48 +01:00
public static final long growfactorLarge100 = 140L ;
public static final long growfactorSmall100 = 120L ;
2007-11-09 16:34:11 +01:00
private static final int isortlimit = 20 ;
2011-07-14 23:42:30 +02:00
2010-04-15 15:22:59 +02:00
private static final int exp_chunkcount = 0 ;
private static final int exp_last_read = 1 ;
private static final int exp_last_wrote = 2 ;
private static final int exp_order_type = 3 ;
private static final int exp_order_bound = 4 ;
private static final int exp_collection = 5 ;
2011-07-14 23:42:30 +02:00
2011-05-16 00:57:31 +02:00
protected final Row rowdef ;
2009-10-12 19:37:12 +02:00
protected byte [ ] chunkcache ;
protected int chunkcount ;
protected int sortBound ;
protected long lastTimeWrote ;
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
protected RowCollection ( final RowCollection rc ) {
2006-08-05 21:18:33 +02:00
this . rowdef = rc . rowdef ;
this . chunkcache = rc . chunkcache ;
this . chunkcount = rc . chunkcount ;
this . sortBound = rc . sortBound ;
this . lastTimeWrote = rc . lastTimeWrote ;
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
protected RowCollection ( final Row rowdef ) {
2006-06-20 16:17:21 +02:00
this . rowdef = rowdef ;
this . sortBound = 0 ;
2006-06-21 18:05:31 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
2010-01-09 01:08:16 +01:00
this . chunkcache = new byte [ 0 ] ;
this . chunkcount = 0 ;
}
2011-07-14 23:42:30 +02:00
2010-01-09 01:08:16 +01:00
public RowCollection ( final Row rowdef , final int objectCount ) throws RowSpaceExceededException {
this ( rowdef ) ;
ensureSize ( objectCount ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
protected RowCollection ( final Row rowdef , final int objectCount , final byte [ ] cache , final int sortBound ) {
2006-06-20 16:17:21 +02:00
this . rowdef = rowdef ;
this . chunkcache = cache ;
this . chunkcount = objectCount ;
2006-08-05 01:04:03 +02:00
this . sortBound = sortBound ;
2006-06-21 18:05:31 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
protected RowCollection ( final Row rowdef , final Row . Entry exportedCollectionRowEnvironment ) {
2010-10-13 00:02:10 +02:00
final int chunkcachelength = exportedCollectionRowEnvironment . cellwidth ( 1 ) - ( int ) exportOverheadSize ;
2009-01-30 16:33:00 +01:00
final Row . Entry exportedCollection = exportRow ( chunkcachelength ) . newEntry ( exportedCollectionRowEnvironment , 1 ) ;
2011-07-14 23:42:30 +02:00
2006-08-05 01:04:03 +02:00
this . rowdef = rowdef ;
2006-08-11 05:20:44 +02:00
this . chunkcount = ( int ) exportedCollection . getColLong ( exp_chunkcount ) ;
2006-11-07 11:56:36 +01:00
if ( ( this . chunkcount > chunkcachelength / rowdef . objectsize ) ) {
2009-01-31 00:33:47 +01:00
Log . logWarning ( " RowCollection " , " corrected wrong chunkcount; chunkcount = " + this . chunkcount + " , chunkcachelength = " + chunkcachelength + " , rowdef.objectsize = " + rowdef . objectsize ) ;
2006-11-07 11:56:36 +01:00
this . chunkcount = chunkcachelength / rowdef . objectsize ; // patch problem
}
2006-08-11 05:20:44 +02:00
this . lastTimeWrote = ( exportedCollection . getColLong ( exp_last_wrote ) + 10957 ) * day ;
2011-03-10 13:35:32 +01:00
final String sortOrderKey = exportedCollection . getColString ( exp_order_type ) ;
2009-01-30 16:33:00 +01:00
ByteOrder oldOrder = null ;
2006-10-06 01:47:08 +02:00
if ( ( sortOrderKey = = null ) | | ( sortOrderKey . equals ( " __ " ) ) ) {
2006-12-06 04:02:57 +01:00
oldOrder = null ;
2006-08-05 01:04:03 +02:00
} else {
2009-01-30 16:33:00 +01:00
oldOrder = NaturalOrder . bySignature ( sortOrderKey ) ;
if ( oldOrder = = null ) oldOrder = Base64Order . bySignature ( sortOrderKey ) ;
2006-08-05 01:04:03 +02:00
}
2006-12-06 04:02:57 +01:00
if ( ( rowdef . objectOrder ! = null ) & & ( oldOrder ! = null ) & & ( ! ( rowdef . objectOrder . signature ( ) . equals ( oldOrder . signature ( ) ) ) ) )
2007-11-07 23:38:09 +01:00
throw new kelondroException ( " old collection order does not match with new order; objectOrder.signature = " + rowdef . objectOrder . signature ( ) + " , oldOrder.signature = " + oldOrder . signature ( ) ) ;
2006-08-11 05:20:44 +02:00
this . sortBound = ( int ) exportedCollection . getColLong ( exp_order_bound ) ;
2011-07-14 23:42:30 +02:00
if ( this . sortBound > this . chunkcount ) {
Log . logWarning ( " RowCollection " , " corrected wrong sortBound; sortBound = " + this . sortBound + " , chunkcount = " + this . chunkcount ) ;
this . sortBound = this . chunkcount ;
2006-10-26 15:50:50 +02:00
}
2011-07-14 23:42:30 +02:00
this . chunkcache = exportedCollection . getColBytes ( exp_collection , false ) ;
2006-08-05 01:04:03 +02:00
}
2011-07-14 23:42:30 +02:00
protected RowCollection ( final Row rowdef , final byte [ ] chunkcache , final int chunkcount , final int sortBound , final long lastTimeWrote ) {
2010-04-15 15:22:59 +02:00
this . rowdef = rowdef ;
this . chunkcache = new byte [ chunkcache . length ] ;
System . arraycopy ( chunkcache , 0 , this . chunkcache , 0 , chunkcache . length ) ;
this . chunkcount = chunkcount ;
this . sortBound = sortBound ;
this . lastTimeWrote = lastTimeWrote ;
}
2011-07-14 23:42:30 +02:00
2010-04-15 15:22:59 +02:00
public RowCollection clone ( ) {
return new RowCollection ( this . rowdef , this . chunkcache , this . chunkcount , this . sortBound , this . lastTimeWrote ) ;
}
2007-04-05 12:14:48 +02:00
public void reset ( ) {
this . chunkcache = new byte [ 0 ] ;
this . chunkcount = 0 ;
this . sortBound = 0 ;
}
2011-07-14 23:42:30 +02:00
2010-08-03 06:58:48 +02:00
/ * *
* calculate the memory that the structure occupies in ram
* @return number of bytes in use
* /
public long mem ( ) {
return this . chunkcache . length ;
}
2011-07-14 23:42:30 +02:00
2009-01-30 16:33:00 +01:00
private static final Row exportMeasureRow = exportRow ( 0 /* no relevance */ ) ;
2007-04-03 14:10:12 +02:00
2009-01-30 16:33:00 +01:00
public static final int sizeOfExportedCollectionRows ( final Row . Entry exportedCollectionRowEnvironment , final int columnInEnvironment ) {
final Row . Entry exportedCollectionEntry = exportMeasureRow . newEntry ( exportedCollectionRowEnvironment , columnInEnvironment ) ;
2008-08-02 14:12:04 +02:00
final int chunkcount = ( int ) exportedCollectionEntry . getColLong ( exp_chunkcount ) ;
2007-03-27 10:21:03 +02:00
return chunkcount ;
}
2011-07-14 23:42:30 +02:00
2006-08-05 01:04:03 +02:00
private static final long day = 1000 * 60 * 60 * 24 ;
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
private static int daysSince2000 ( final long time ) {
2006-08-05 01:04:03 +02:00
return ( int ) ( time / day ) - 10957 ;
}
2011-07-14 23:42:30 +02:00
2009-08-28 15:28:11 +02:00
private static Column exportColumn0 , exportColumn1 , exportColumn2 , exportColumn3 , exportColumn4 ;
2010-10-13 00:02:10 +02:00
protected static final long exportOverheadSize = 14 ;
2010-04-19 18:42:37 +02:00
2009-01-30 16:33:00 +01:00
private static Row exportRow ( final int chunkcachelength ) {
2009-08-28 15:28:11 +02:00
/ *
2009-01-30 16:33:00 +01:00
return new Row (
2006-08-05 01:04:03 +02:00
" int size-4 {b256}, " +
" short lastread-2 {b256}, " + // as daysSince2000
" short lastwrote-2 {b256}, " + // as daysSince2000
" byte[] orderkey-2, " +
2009-01-01 23:31:16 +01:00
" int orderbound-4 {b256}, " +
2006-12-06 04:02:57 +01:00
" byte[] collection- " + chunkcachelength ,
2009-03-13 17:52:31 +01:00
NaturalOrder . naturalOrder
2006-08-05 01:04:03 +02:00
) ;
2009-08-28 15:28:11 +02:00
* /
2011-07-14 23:42:30 +02:00
2009-08-28 15:28:11 +02:00
if ( exportColumn0 = = null ) exportColumn0 = new Column ( " int size-4 {b256} " ) ;
if ( exportColumn1 = = null ) exportColumn1 = new Column ( " short lastread-2 {b256} " ) ;
if ( exportColumn2 = = null ) exportColumn2 = new Column ( " short lastwrote-2 {b256} " ) ;
if ( exportColumn3 = = null ) exportColumn3 = new Column ( " byte[] orderkey-2 " ) ;
if ( exportColumn4 = = null ) exportColumn4 = new Column ( " int orderbound-4 {b256} " ) ;
/ *
* because of a strange bug these objects cannot be initialized as normal
* static final . If I try that , they are not initialized and are assigned null . why ?
* /
2011-07-14 23:42:30 +02:00
final Row er = new Row ( new Column [ ] {
2009-08-28 15:28:11 +02:00
exportColumn0 , exportColumn1 , exportColumn2 , exportColumn3 , exportColumn4 ,
new Column ( " byte[] collection- " + chunkcachelength )
} ,
NaturalOrder . naturalOrder
) ;
2010-04-19 18:42:37 +02:00
assert er . objectsize = = chunkcachelength + exportOverheadSize ;
return er ;
2006-08-05 01:04:03 +02:00
}
2011-07-14 23:42:30 +02:00
2007-03-14 09:55:05 +01:00
public synchronized byte [ ] exportCollection ( ) {
2006-08-05 01:04:03 +02:00
// returns null if the collection is empty
2010-04-20 15:45:22 +02:00
sort ( ) ; // experimental; supervise CPU load
2010-04-28 00:22:16 +02:00
//uniq();
//trim();
2010-04-20 15:45:22 +02:00
assert this . sortBound = = this . chunkcount ; // on case the collection is sorted
2011-07-14 23:42:30 +02:00
assert size ( ) * this . rowdef . objectsize < = this . chunkcache . length : " this.size() = " + size ( ) + " , objectsize = " + this . rowdef . objectsize + " , chunkcache.length = " + this . chunkcache . length ;
final Row row = exportRow ( size ( ) * this . rowdef . objectsize ) ;
2009-01-30 16:33:00 +01:00
final Row . Entry entry = row . newEntry ( ) ;
2011-07-14 23:42:30 +02:00
assert ( this . sortBound < = this . chunkcount ) : " sortBound = " + this . sortBound + " , chunkcount = " + this . chunkcount ;
assert ( this . chunkcount < = this . chunkcache . length / this . rowdef . objectsize ) : " chunkcount = " + this . chunkcount + " , chunkcache.length = " + this . chunkcache . length + " , rowdef.objectsize = " + this . rowdef . objectsize ;
2006-10-26 15:50:50 +02:00
entry . setCol ( exp_chunkcount , this . chunkcount ) ;
2009-03-18 23:19:08 +01:00
entry . setCol ( exp_last_read , daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
2006-08-11 05:20:44 +02:00
entry . setCol ( exp_last_wrote , daysSince2000 ( this . lastTimeWrote ) ) ;
2011-05-27 10:24:54 +02:00
entry . setCol ( exp_order_type , ( this . rowdef . objectOrder = = null ) ? ASCII . getBytes ( " __ " ) : UTF8 . getBytes ( this . rowdef . objectOrder . signature ( ) ) ) ;
2006-08-11 05:20:44 +02:00
entry . setCol ( exp_order_bound , this . sortBound ) ;
2007-03-03 01:55:51 +01:00
entry . setCol ( exp_collection , this . chunkcache ) ;
2006-08-05 01:04:03 +02:00
return entry . bytes ( ) ;
}
2011-07-14 23:42:30 +02:00
2008-08-02 14:12:04 +02:00
public void saveCollection ( final File file ) throws IOException {
2009-01-31 02:06:56 +01:00
FileUtils . copy ( exportCollection ( ) , file ) ;
2007-03-03 01:55:51 +01:00
}
2006-11-19 21:05:25 +01:00
2009-01-30 16:33:00 +01:00
public Row row ( ) {
2006-06-30 14:54:19 +02:00
return this . rowdef ;
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
private final long neededSpaceForEnsuredSize ( final int elements , final boolean forcegc ) {
2009-08-28 15:28:11 +02:00
assert elements > 0 : " elements = " + elements ;
2011-07-14 23:42:30 +02:00
final long needed = elements * this . rowdef . objectsize ;
if ( this . chunkcache . length > = needed ) return 0 ;
2009-08-28 15:28:11 +02:00
assert needed > 0 : " needed = " + needed ;
2009-10-27 16:25:48 +01:00
long allocram = needed * growfactorLarge100 / 100L ;
2011-07-14 23:42:30 +02:00
allocram - = allocram % this . rowdef . objectsize ;
2009-10-27 16:25:48 +01:00
assert allocram > 0 : " elements = " + elements + " , new = " + allocram ;
2010-02-23 16:59:58 +01:00
if ( allocram < = Integer . MAX_VALUE & & MemoryControl . request ( allocram , false ) ) return allocram ;
2009-10-27 16:25:48 +01:00
allocram = needed * growfactorSmall100 / 100L ;
2011-07-14 23:42:30 +02:00
allocram - = allocram % this . rowdef . objectsize ;
2010-04-21 19:59:22 +02:00
assert allocram > = 0 : " elements = " + elements + " , new = " + allocram ;
2010-02-23 16:59:58 +01:00
if ( allocram < = Integer . MAX_VALUE & & MemoryControl . request ( allocram , forcegc ) ) return allocram ;
2009-10-27 16:25:48 +01:00
return needed ;
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
private final void ensureSize ( final int elements ) throws RowSpaceExceededException {
2010-01-09 01:08:16 +01:00
if ( elements = = 0 ) return ;
2010-02-23 16:59:58 +01:00
final long allocram = neededSpaceForEnsuredSize ( elements , true ) ;
2009-10-27 16:25:48 +01:00
if ( allocram = = 0 ) return ;
2011-07-14 23:42:30 +02:00
assert this . chunkcache . length < elements * this . rowdef . objectsize : " wrong alloc computation (1): elements * rowdef.objectsize = " + ( elements * this . rowdef . objectsize ) + " , chunkcache.length = " + this . chunkcache . length ;
assert allocram > this . chunkcache . length : " wrong alloc computation (2): allocram = " + allocram + " , chunkcache.length = " + this . chunkcache . length ;
2011-02-15 15:26:25 +01:00
if ( allocram > Integer . MAX_VALUE | | ! MemoryControl . request ( allocram + 32 , true ) )
throw new RowSpaceExceededException ( allocram + 32 , " RowCollection grow " ) ;
2009-12-10 00:27:26 +01:00
try {
2010-02-23 16:59:58 +01:00
final byte [ ] newChunkcache = new byte [ ( int ) allocram ] ; // increase space
2011-07-14 23:42:30 +02:00
System . arraycopy ( this . chunkcache , 0 , newChunkcache , 0 , this . chunkcache . length ) ;
this . chunkcache = newChunkcache ;
} catch ( final OutOfMemoryError e ) {
2010-04-13 13:39:54 +02:00
// lets try again after a forced gc()
System . gc ( ) ;
try {
final byte [ ] newChunkcache = new byte [ ( int ) allocram ] ; // increase space
2011-07-14 23:42:30 +02:00
System . arraycopy ( this . chunkcache , 0 , newChunkcache , 0 , this . chunkcache . length ) ;
this . chunkcache = newChunkcache ;
} catch ( final OutOfMemoryError ee ) {
2010-04-13 13:39:54 +02:00
throw new RowSpaceExceededException ( allocram , " RowCollection grow after OutOfMemoryError " + ee . getMessage ( ) ) ;
}
2009-12-10 00:27:26 +01:00
}
2007-05-16 19:52:11 +02:00
}
2011-07-14 23:42:30 +02:00
2009-02-16 00:35:59 +01:00
/ * *
* compute the needed memory in case of a cache extension . That is , if the cache is full and must
* be copied into a new cache which is larger . In such a case the Collection needs more than the double size
2009-08-28 15:28:11 +02:00
* than is necessary to store the data . This method computes the extra memory that is needed to perform this task .
2009-02-16 00:35:59 +01:00
* @return
* /
2010-04-16 18:07:19 +02:00
protected final long memoryNeededForGrow ( ) {
2011-07-14 23:42:30 +02:00
return neededSpaceForEnsuredSize ( this . chunkcount + 1 , false ) ;
2007-03-21 14:26:18 +01:00
}
2011-07-14 23:42:30 +02:00
2011-07-16 12:08:43 +02:00
@Override
public int compare ( final Entry o1 , final Entry o2 ) {
return o1 . compareTo ( o2 ) ;
}
@Override
public Entry buffer ( ) {
return row ( ) . newEntry ( ) ;
}
@Override
public void swap ( final int i , final int j , final Entry buffer ) {
if ( i = = j ) return ;
final byte [ ] swapspace = buffer . bytes ( ) ;
System . arraycopy ( this . chunkcache , this . rowdef . objectsize * i , swapspace , 0 , this . rowdef . objectsize ) ;
System . arraycopy ( this . chunkcache , this . rowdef . objectsize * j , this . chunkcache , this . rowdef . objectsize * i , this . rowdef . objectsize ) ;
System . arraycopy ( swapspace , 0 , this . chunkcache , this . rowdef . objectsize * j , this . rowdef . objectsize ) ;
}
2010-04-28 00:22:16 +02:00
protected synchronized void trim ( ) {
2011-07-14 23:42:30 +02:00
if ( this . chunkcache . length = = 0 ) return ;
final long needed = this . chunkcount * this . rowdef . objectsize ;
assert needed < = this . chunkcache . length ;
if ( needed > = this . chunkcache . length )
2007-03-14 09:55:05 +01:00
return ; // in case that the growfactor causes that the cache would
// grow instead of shrink, simply ignore the growfactor
2009-01-30 16:33:00 +01:00
if ( MemoryControl . available ( ) + 1000 < needed )
2007-03-14 09:55:05 +01:00
return ; // if the swap buffer is not available, we must give up.
2008-04-24 17:09:06 +02:00
// This is not critical. Otherwise we provoke a serious
2007-03-14 09:55:05 +01:00
// problem with OOM
2010-01-10 02:40:26 +01:00
final byte [ ] newChunkcache = new byte [ ( int ) needed ] ;
2011-07-14 23:42:30 +02:00
System . arraycopy ( this . chunkcache , 0 , newChunkcache , 0 , Math . min ( this . chunkcache . length , newChunkcache . length ) ) ;
this . chunkcache = newChunkcache ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2006-06-21 00:13:17 +02:00
public final long lastWrote ( ) {
2011-07-14 23:42:30 +02:00
return this . lastTimeWrote ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
protected synchronized final byte [ ] getKey ( final int index ) {
2007-12-27 18:56:59 +01:00
assert ( index > = 0 ) : " get: access with index " + index + " is below zero " ;
2011-07-14 23:42:30 +02:00
assert ( index < this . chunkcount ) : " get: access with index " + index + " is above chunkcount " + this . chunkcount + " ; sortBound = " + this . sortBound ;
assert ( index * this . rowdef . objectsize < this . chunkcache . length ) ;
if ( ( this . chunkcache = = null ) | | ( this . rowdef = = null ) ) return null ; // case may appear during shutdown
if ( index > = this . chunkcount ) return null ;
if ( ( index + 1 ) * this . rowdef . objectsize > this . chunkcache . length ) return null ; // the whole chunk does not fit into the chunkcache
2010-04-28 10:38:57 +02:00
final byte [ ] b = new byte [ this . rowdef . primaryKeyLength ] ;
2011-07-14 23:42:30 +02:00
System . arraycopy ( this . chunkcache , index * this . rowdef . objectsize , b , 0 , b . length ) ;
2007-12-27 18:56:59 +01:00
return b ;
}
2011-07-14 23:42:30 +02:00
2009-01-30 16:33:00 +01:00
public synchronized final Row . Entry get ( final int index , final boolean clone ) {
2006-10-21 12:50:30 +02:00
assert ( index > = 0 ) : " get: access with index " + index + " is below zero " ;
2011-07-14 23:42:30 +02:00
assert ( index < this . chunkcount ) : " get: access with index " + index + " is above chunkcount " + this . chunkcount + " ; sortBound = " + this . sortBound ;
assert ( this . chunkcache ! = null & & index * this . rowdef . objectsize < this . chunkcache . length ) ;
assert this . sortBound < = this . chunkcount : " sortBound = " + this . sortBound + " , chunkcount = " + this . chunkcount ;
if ( ( this . chunkcache = = null ) | | ( this . rowdef = = null ) ) return null ; // case may appear during shutdown
2009-01-30 16:33:00 +01:00
Row . Entry entry ;
2011-07-14 23:42:30 +02:00
final int addr = index * this . rowdef . objectsize ;
if ( index > = this . chunkcount ) return null ;
if ( addr + this . rowdef . objectsize > this . chunkcache . length ) return null ; // the whole chunk does not fit into the chunkcache
entry = this . rowdef . newEntry ( this . chunkcache , addr , clone ) ;
2008-04-24 17:09:06 +02:00
return entry ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public synchronized final void set ( final int index , final Row . Entry a ) throws RowSpaceExceededException {
2007-09-04 01:43:55 +02:00
assert ( index > = 0 ) : " set: access with index " + index + " is below zero " ;
ensureSize ( index + 1 ) ;
2011-07-14 23:42:30 +02:00
final byte [ ] column = a . bytes ( ) ;
2010-04-28 10:38:57 +02:00
assert a . cellwidth ( 0 ) = = this . rowdef . primaryKeyLength ;
assert column . length > = this . rowdef . primaryKeyLength ;
final boolean sameKey = match ( column , 0 , index ) ;
2009-01-29 17:42:01 +01:00
//if (sameKey) System.out.print("$");
2011-07-14 23:42:30 +02:00
a . writeToArray ( this . chunkcache , index * this . rowdef . objectsize ) ;
2009-01-01 23:31:16 +01:00
if ( index > = this . chunkcount ) this . chunkcount = index + 1 ;
2009-01-29 17:42:01 +01:00
if ( ! sameKey & & index < this . sortBound ) this . sortBound = index ;
2006-06-20 16:17:21 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public final void insertUnique ( final int index , final Row . Entry a ) throws RowSpaceExceededException {
2007-09-04 01:43:55 +02:00
assert ( a ! = null ) ;
2011-07-14 23:42:30 +02:00
if ( index < this . chunkcount ) {
2007-09-04 01:43:55 +02:00
// make room
2011-07-14 23:42:30 +02:00
ensureSize ( this . chunkcount + 1 ) ;
System . arraycopy ( this . chunkcache , this . rowdef . objectsize * index , this . chunkcache , this . rowdef . objectsize * ( index + 1 ) , ( this . chunkcount - index ) * this . rowdef . objectsize ) ;
this . chunkcount + + ;
2007-09-04 01:43:55 +02:00
}
// insert entry into gap
set ( index , a ) ;
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public synchronized void addUnique ( final Row . Entry row ) throws RowSpaceExceededException {
2008-08-02 14:12:04 +02:00
final byte [ ] r = row . bytes ( ) ;
2008-11-24 00:55:08 +01:00
addUnique ( r , 0 , r . length ) ;
2006-06-20 16:17:21 +02:00
}
2006-10-19 23:14:37 +02:00
2009-12-10 00:27:26 +01:00
public synchronized void addUnique ( final List < Row . Entry > rows ) throws RowSpaceExceededException {
2007-05-16 12:48:26 +02:00
assert this . sortBound = = 0 : " sortBound = " + this . sortBound + " , chunkcount = " + this . chunkcount ;
2009-01-30 16:33:00 +01:00
final Iterator < Row . Entry > i = rows . iterator ( ) ;
2008-11-24 00:55:08 +01:00
while ( i . hasNext ( ) ) addUnique ( i . next ( ) ) ;
2007-02-27 16:54:02 +01:00
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public synchronized void add ( final byte [ ] a ) throws RowSpaceExceededException {
2009-04-22 00:12:19 +02:00
assert a . length = = this . rowdef . objectsize : " a.length = " + a . length + " , objectsize = " + this . rowdef . objectsize ;
2006-10-30 03:39:39 +01:00
addUnique ( a , 0 , a . length ) ;
2006-06-21 01:47:51 +02:00
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
private final void addUnique ( final byte [ ] a , final int astart , final int alength ) throws RowSpaceExceededException {
2006-10-24 15:48:16 +02:00
assert ( a ! = null ) ;
2008-08-06 21:43:12 +02:00
assert ( astart > = 0 ) & & ( astart < a . length ) : " astart = " + astart ;
2009-01-31 00:33:47 +01:00
assert ( ! ( Log . allZero ( a , astart , alength ) ) ) : " a = " + NaturalOrder . arrayList ( a , astart , alength ) ;
2006-10-24 15:48:16 +02:00
assert ( alength > 0 ) ;
assert ( astart + alength < = a . length ) ;
2011-07-14 23:42:30 +02:00
assert alength = = this . rowdef . objectsize : " alength = " + alength + " , rowdef.objectsize = " + this . rowdef . objectsize ;
final int l = Math . min ( this . rowdef . objectsize , Math . min ( alength , a . length - astart ) ) ;
ensureSize ( this . chunkcount + 1 ) ;
System . arraycopy ( a , astart , this . chunkcache , this . rowdef . objectsize * this . chunkcount , l ) ;
this . chunkcount + + ;
2009-01-01 23:31:16 +01:00
// if possible, increase the sortbound value to suppress unnecessary sorting
if ( this . chunkcount = = 1 ) {
assert this . sortBound = = 0 ;
this . sortBound = 1 ;
} else if (
2011-07-14 23:42:30 +02:00
this . sortBound + 1 = = this . chunkcount & &
this . rowdef . objectOrder . compare ( this . chunkcache , this . rowdef . objectsize * ( this . chunkcount - 2 ) ,
this . chunkcache , this . rowdef . objectsize * ( this . chunkcount - 1 ) , this . rowdef . primaryKeyLength ) = = - 1 ) {
this . sortBound = this . chunkcount ;
2009-01-01 23:31:16 +01:00
}
2006-06-20 16:17:21 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
protected final void addSorted ( final byte [ ] a , final int astart , final int alength ) throws RowSpaceExceededException {
2009-01-02 12:38:20 +01:00
assert ( a ! = null ) ;
assert ( astart > = 0 ) & & ( astart < a . length ) : " astart = " + astart ;
2009-01-31 00:33:47 +01:00
assert ( ! ( Log . allZero ( a , astart , alength ) ) ) : " a = " + NaturalOrder . arrayList ( a , astart , alength ) ;
2009-01-02 12:38:20 +01:00
assert ( alength > 0 ) ;
assert ( astart + alength < = a . length ) ;
2011-07-14 23:42:30 +02:00
assert alength = = this . rowdef . objectsize : " alength = " + alength + " , rowdef.objectsize = " + this . rowdef . objectsize ;
final int l = Math . min ( this . rowdef . objectsize , Math . min ( alength , a . length - astart ) ) ;
ensureSize ( this . chunkcount + 1 ) ;
System . arraycopy ( a , astart , this . chunkcache , this . rowdef . objectsize * this . chunkcount , l ) ;
2009-01-02 12:38:20 +01:00
this . chunkcount + + ;
this . sortBound = this . chunkcount ;
this . lastTimeWrote = System . currentTimeMillis ( ) ;
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public synchronized final void addAllUnique ( final RowCollection c ) throws RowSpaceExceededException {
2006-12-07 03:40:57 +01:00
if ( c = = null ) return ;
2011-07-14 23:42:30 +02:00
assert ( this . rowdef . objectsize = = c . rowdef . objectsize ) ;
ensureSize ( this . chunkcount + c . size ( ) ) ;
System . arraycopy ( c . chunkcache , 0 , this . chunkcache , this . rowdef . objectsize * this . chunkcount , this . rowdef . objectsize * c . size ( ) ) ;
this . chunkcount + = c . size ( ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2007-10-11 19:17:08 +02:00
/ * *
* This method removes the entry at position p ensuring the order of the remaining
* entries if specified by keepOrder .
* Note : Keeping the order is expensive . If you want to remove more than one element in
* a batch with this method , it ' d be better to do the removes without order keeping and doing
* the sort after all the removes are done .
2011-07-14 23:42:30 +02:00
*
2007-10-11 19:17:08 +02:00
* @param p element at this position will be removed
* @param keepOrder keep the order of remaining entries
* /
2009-01-30 16:33:00 +01:00
public synchronized final void removeRow ( final int p , final boolean keepOrder ) {
2007-04-19 15:37:02 +02:00
assert p > = 0 : " p = " + p ;
2011-07-14 23:42:30 +02:00
assert p < this . chunkcount : " p = " + p + " , chunkcount = " + this . chunkcount ;
assert this . chunkcount > 0 : " chunkcount = " + this . chunkcount ;
assert this . sortBound < = this . chunkcount : " sortBound = " + this . sortBound + " , chunkcount = " + this . chunkcount ;
if ( keepOrder & & ( p < this . sortBound ) ) {
2007-10-11 19:17:08 +02:00
// remove by shift (quite expensive for big collections)
2008-08-02 14:12:04 +02:00
final int addr = p * this . rowdef . objectsize ;
2007-10-11 19:17:08 +02:00
System . arraycopy (
2011-07-14 23:42:30 +02:00
this . chunkcache , addr + this . rowdef . objectsize ,
this . chunkcache , addr ,
( this . chunkcount - p - 1 ) * this . rowdef . objectsize ) ;
this . sortBound - - ; // this is only correct if p < sortBound, but this was already checked above
2007-03-14 09:55:05 +01:00
} else {
2007-10-11 19:17:08 +02:00
// remove by copying the top-element to the remove position
2011-07-14 23:42:30 +02:00
if ( p ! = this . chunkcount - 1 ) {
2007-10-11 19:17:08 +02:00
System . arraycopy (
2011-07-14 23:42:30 +02:00
this . chunkcache , ( this . chunkcount - 1 ) * this . rowdef . objectsize ,
this . chunkcache , p * this . rowdef . objectsize ,
2007-11-07 23:38:09 +01:00
this . rowdef . objectsize ) ;
2007-10-11 19:17:08 +02:00
}
// we moved the last element to the remove position: (p+1)st element
2007-10-22 01:26:22 +02:00
// only the first p elements keep their order (element p is already outside the order)
2011-07-14 23:42:30 +02:00
if ( this . sortBound > p ) this . sortBound = p ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
this . chunkcount - - ;
2006-06-20 16:17:21 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
}
2011-07-14 23:42:30 +02:00
2011-07-16 12:08:43 +02:00
public final void delete ( final int p ) {
removeRow ( p , true ) ;
}
2008-10-24 15:58:26 +02:00
/ * *
* removes the last entry from the collection
* @return
* /
2009-01-30 16:33:00 +01:00
public synchronized Row . Entry removeOne ( ) {
2011-07-14 23:42:30 +02:00
if ( this . chunkcount = = 0 ) return null ;
final Row . Entry r = get ( this . chunkcount - 1 , true ) ;
if ( this . chunkcount = = this . sortBound ) this . sortBound - - ;
this . chunkcount - - ;
2007-03-14 09:55:05 +01:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
return r ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2010-05-31 02:27:00 +02:00
public synchronized List < Row . Entry > top ( int count ) {
2011-07-14 23:42:30 +02:00
final ArrayList < Row . Entry > list = new ArrayList < Row . Entry > ( ) ;
if ( this . chunkcount = = 0 ) return list ;
2010-05-31 02:27:00 +02:00
Row . Entry entry ;
2011-07-14 23:42:30 +02:00
int cursor = this . chunkcount - 1 ;
2010-05-31 02:27:00 +02:00
while ( count > 0 & & cursor > = 0 ) {
entry = get ( cursor , true ) ;
list . add ( entry ) ;
count - - ;
cursor - - ;
}
return list ;
}
2011-07-14 23:42:30 +02:00
2010-04-19 18:42:37 +02:00
public synchronized byte [ ] smallestKey ( ) {
2011-07-14 23:42:30 +02:00
if ( this . chunkcount = = 0 ) return null ;
sort ( ) ;
2009-07-03 18:35:34 +02:00
final Row . Entry r = get ( 0 , false ) ;
2010-01-10 02:40:26 +01:00
final byte [ ] b = r . getPrimaryKeyBytes ( ) ;
2009-07-03 18:35:34 +02:00
return b ;
}
2011-07-14 23:42:30 +02:00
2010-04-19 18:42:37 +02:00
public synchronized byte [ ] largestKey ( ) {
2011-07-14 23:42:30 +02:00
if ( this . chunkcount = = 0 ) return null ;
sort ( ) ;
final Row . Entry r = get ( this . chunkcount - 1 , false ) ;
2010-01-10 02:40:26 +01:00
final byte [ ] b = r . getPrimaryKeyBytes ( ) ;
2009-07-03 18:35:34 +02:00
return b ;
}
2011-07-14 23:42:30 +02:00
2007-03-14 09:55:05 +01:00
public synchronized void clear ( ) {
2007-05-16 12:48:26 +02:00
if ( this . chunkcache . length = = 0 ) return ;
2006-06-20 16:17:21 +02:00
this . chunkcache = new byte [ 0 ] ;
this . chunkcount = 0 ;
this . sortBound = 0 ;
2006-06-21 18:05:31 +02:00
this . lastTimeWrote = System . currentTimeMillis ( ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2006-06-21 01:18:41 +02:00
public int size ( ) {
2009-01-02 12:38:20 +01:00
return this . chunkcount ;
}
2011-07-14 23:42:30 +02:00
2009-12-02 01:37:59 +01:00
public boolean isEmpty ( ) {
return this . chunkcount = = 0 ;
}
2011-07-14 23:42:30 +02:00
2009-01-02 12:38:20 +01:00
public int sorted ( ) {
return this . sortBound ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2010-01-10 02:40:26 +01:00
public synchronized Iterator < byte [ ] > keys ( final boolean keepOrderWhenRemoving ) {
2007-12-27 18:56:59 +01:00
// iterates byte[] - type entries
2009-01-09 01:06:36 +01:00
return new keyIterator ( keepOrderWhenRemoving ) ;
2007-12-27 18:56:59 +01:00
}
2011-07-14 23:42:30 +02:00
2007-12-27 18:56:59 +01:00
/ * *
* Iterator for kelondroRowCollection .
* It supports remove ( ) though it doesn ' t contain the order of the underlying
* collection during removes .
*
* /
2010-04-16 18:07:19 +02:00
private class keyIterator implements Iterator < byte [ ] > {
2007-12-27 18:56:59 +01:00
private int p ;
2010-01-11 00:09:48 +01:00
private final boolean keepOrderWhenRemoving ;
2011-07-14 23:42:30 +02:00
2010-04-16 18:07:19 +02:00
private keyIterator ( final boolean keepOrderWhenRemoving ) {
2009-01-09 01:06:36 +01:00
this . p = 0 ;
this . keepOrderWhenRemoving = keepOrderWhenRemoving ;
2007-12-27 18:56:59 +01:00
}
2011-07-14 23:42:30 +02:00
2007-12-27 18:56:59 +01:00
public boolean hasNext ( ) {
2011-07-14 23:42:30 +02:00
return this . p < RowCollection . this . chunkcount ;
2007-12-27 18:56:59 +01:00
}
public byte [ ] next ( ) {
2011-07-14 23:42:30 +02:00
return getKey ( this . p + + ) ;
2007-12-27 18:56:59 +01:00
}
2011-07-14 23:42:30 +02:00
2007-12-27 18:56:59 +01:00
public void remove ( ) {
2011-07-14 23:42:30 +02:00
this . p - - ;
removeRow ( this . p , this . keepOrderWhenRemoving ) ;
2007-12-27 18:56:59 +01:00
}
2011-07-14 23:42:30 +02:00
}
2009-01-02 12:38:20 +01:00
/ * *
* return an iterator for the row entries in this object
* /
2009-01-30 16:33:00 +01:00
public Iterator < Row . Entry > iterator ( ) {
2007-03-03 01:55:51 +01:00
// iterates kelondroRow.Entry - type entries
2006-07-04 16:47:27 +02:00
return new rowIterator ( ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2007-10-11 19:17:08 +02:00
/ * *
* Iterator for kelondroRowCollection .
2009-01-09 01:06:36 +01:00
* It supports remove ( ) and keeps the order of the underlying
2007-10-11 19:17:08 +02:00
* collection during removes .
* /
2010-04-16 18:07:19 +02:00
private class rowIterator implements Iterator < Row . Entry > {
2006-06-20 16:17:21 +02:00
2006-07-04 16:47:27 +02:00
private int p ;
2011-07-14 23:42:30 +02:00
2006-07-04 16:47:27 +02:00
public rowIterator ( ) {
2011-07-14 23:42:30 +02:00
this . p = 0 ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2006-06-20 16:17:21 +02:00
public boolean hasNext ( ) {
2011-07-14 23:42:30 +02:00
return this . p < RowCollection . this . chunkcount ;
2006-06-20 16:17:21 +02:00
}
2009-01-30 16:33:00 +01:00
public Row . Entry next ( ) {
2011-07-14 23:42:30 +02:00
return get ( this . p + + , true ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2006-06-20 16:17:21 +02:00
public void remove ( ) {
2011-07-14 23:42:30 +02:00
this . p - - ;
removeRow ( this . p , true ) ;
2006-06-20 16:17:21 +02:00
}
2009-01-02 12:38:20 +01:00
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2011-07-16 12:08:43 +02:00
public final void sort ( ) {
2011-07-16 21:21:39 +02:00
if ( this . sortBound = = this . chunkcount ) return ; // this is sorted
2011-07-16 21:13:30 +02:00
net . yacy . cora . storage . Array . sort ( this ) ;
2011-07-16 21:21:39 +02:00
this . sortBound = this . chunkcount ;
2007-11-09 01:51:38 +01:00
}
2011-07-14 23:42:30 +02:00
2008-06-16 01:25:57 +02:00
public static class partitionthread implements Callable < Integer > {
2009-01-30 16:33:00 +01:00
RowCollection rc ;
2008-06-16 01:25:57 +02:00
int L , R , S ;
2011-07-14 23:42:30 +02:00
2009-01-30 16:33:00 +01:00
public partitionthread ( final RowCollection rc , final int L , final int R , final int S ) {
2008-06-16 01:25:57 +02:00
this . rc = rc ;
this . L = L ;
this . R = R ;
this . S = S ;
}
2011-07-14 23:42:30 +02:00
2008-06-16 01:25:57 +02:00
public Integer call ( ) throws Exception {
2011-07-14 23:42:30 +02:00
return Integer . valueOf ( this . rc . partition ( this . L , this . R , this . S , new byte [ this . rc . rowdef . objectsize ] ) ) ;
2008-06-16 01:25:57 +02:00
}
}
2011-07-14 23:42:30 +02:00
2008-10-24 15:58:26 +02:00
/ * *
* @param L is the first element in the sequence
* @param R is the right bound of the sequence , and outside of the sequence
* @param S is the bound of the sorted elements in the sequence
* @param swapspace
* @return
* /
2008-08-02 14:12:04 +02:00
final int partition ( final int L , final int R , int S , final byte [ ] swapspace ) {
2008-03-23 00:15:28 +01:00
assert ( L < R - 1 ) : " L = " + L + " , R = " + R + " , S = " + S ;
2009-06-02 00:45:28 +02:00
assert ( R - L > = isortlimit ) : " L = " + L + " , R = " + R + " , S = " + S + " , isortlimit = " + isortlimit ;
2011-07-14 23:42:30 +02:00
2007-11-09 01:51:38 +01:00
int p = L ;
int q = R - 1 ;
2009-06-30 11:27:46 +02:00
int pivot = pivot ( L , R , S ) ;
2009-01-30 16:33:00 +01:00
if ( this . rowdef . objectOrder instanceof Base64Order ) {
2007-11-09 01:51:38 +01:00
while ( p < = q ) {
2007-11-09 16:34:11 +01:00
// wenn pivot < S: pivot befindet sich in sortierter Sequenz von L bis S - 1
// d.h. alle Werte von L bis pivot sind kleiner als das pivot
2009-04-22 00:12:19 +02:00
// zu finden ist ein minimales p <= q so dass chunk[p] >= pivot
2007-11-09 16:34:11 +01:00
if ( ( pivot < S ) & & ( p < pivot ) ) {
//System.out.println("+++ saved " + (pivot - p) + " comparisments");
p = pivot ;
S = 0 ;
} else {
2009-04-22 00:12:19 +02:00
while ( ( p < R - 1 ) & & ( compare ( pivot , p ) > = 0 ) ) p + + ; // chunkAt[p] < pivot
2007-11-09 16:34:11 +01:00
}
// nun gilt chunkAt[p] >= pivot
2009-04-22 00:12:19 +02:00
while ( ( q > L ) & & ( compare ( pivot , q ) < = 0 ) ) q - - ; // chunkAt[q] > pivot
2007-11-09 01:51:38 +01:00
if ( p < = q ) {
pivot = swap ( p , q , pivot , swapspace ) ;
p + + ;
q - - ;
}
}
} else {
while ( p < = q ) {
2007-11-09 16:34:11 +01:00
if ( ( pivot < S ) & & ( p < pivot ) ) {
p = pivot ;
S = 0 ;
} else {
2008-03-23 00:15:28 +01:00
while ( ( p < R - 1 ) & & ( compare ( pivot , p ) > = 0 ) ) p + + ; // chunkAt[p] < pivot
2007-11-09 16:34:11 +01:00
}
2008-03-23 00:15:28 +01:00
while ( ( q > L ) & & ( compare ( pivot , q ) < = 0 ) ) q - - ; // chunkAt[q] > pivot
2007-11-09 01:51:38 +01:00
if ( p < = q ) {
pivot = swap ( p , q , pivot , swapspace ) ;
p + + ;
q - - ;
}
}
2006-06-20 16:17:21 +02:00
}
2008-03-23 00:15:28 +01:00
// now p is the beginning of the upper sequence
// finally, the pivot element should be exactly between the two sequences
// distinguish two cases: pivot in lower and upper sequence
// to do this it is sufficient to compare the index, not the entry content
if ( pivot < p ) {
// switch the pivot with the element _below_ p, the element in p belongs to the upper sequence
// and does not fit into the lower sequence
swap ( pivot , p - 1 , pivot , swapspace ) ;
return p - 1 ;
} else if ( pivot > p ) {
// switch the pivot with p, they are both in the same sequence
swap ( pivot , p , pivot , swapspace ) ;
return p ;
}
assert pivot = = p ;
2007-11-09 01:51:38 +01:00
return p ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2009-06-30 11:27:46 +02:00
private final int pivot ( final int L , final int R , final int S ) {
2010-08-03 04:42:28 +02:00
if ( S = = 0 | | S < L ) {
2008-03-20 23:01:12 +01:00
// the collection has no ordering
// or
// the collection has an ordering, but this is not relevant for this pivot
// because the ordered zone is outside of ordering zone
2010-08-03 04:42:28 +02:00
final int m = picMiddle ( L , ( 3 * L + R - 1 ) / 4 , ( L + R - 1 ) / 2 , ( L + 3 * R - 3 ) / 4 , R - 1 ) ;
2008-03-20 23:01:12 +01:00
assert L < = m ;
assert m < R ;
return m ;
}
if ( S < R ) {
// the collection has an ordering
// and part of the ordered zone is inside the to-be-ordered zone
2010-08-03 04:42:28 +02:00
final int m = picMiddle ( L , L + ( S - L ) / 3 , ( L + R - 1 ) / 2 , S , R - 1 ) ;
2008-03-20 23:01:12 +01:00
assert L < = m ;
assert m < R ;
return m ;
}
// use the sorted set to find good pivot:
// the sort range is fully inside the sorted area:
// the middle element must be the best
2008-03-21 00:11:04 +01:00
// (however, it should be skipped because there is no point in sorting this)
2008-03-20 23:01:12 +01:00
return ( L + R - 1 ) / 2 ;
}
2010-08-03 04:42:28 +02:00
private final int picMiddle ( final int a , final int b , final int c , final int d , final int e ) {
return picMiddle ( picMiddle ( a , b , c ) , d , e ) ;
}
2011-07-14 23:42:30 +02:00
2010-08-03 04:42:28 +02:00
private final int picMiddle ( final int a , final int b , final int c ) {
2010-08-04 15:33:12 +02:00
if ( compare ( a , b ) > 0 ) {
if ( compare ( c , a ) > 0 ) return a ;
if ( compare ( b , c ) > 0 ) return b ;
2010-08-03 04:42:28 +02:00
} else {
2010-08-04 15:33:12 +02:00
if ( compare ( a , c ) > 0 ) return a ;
if ( compare ( c , b ) > 0 ) return b ;
2010-08-03 04:42:28 +02:00
}
2010-08-04 15:33:12 +02:00
return c ;
2010-08-03 04:42:28 +02:00
//if (c < a && a < b || a > b && c > a) return a;
//if (a < b && c > b || c < b && a > b) return b;
}
2011-07-14 23:42:30 +02:00
2008-08-02 14:12:04 +02:00
private final int swap ( final int i , final int j , final int p , final byte [ ] swapspace ) {
2006-06-20 16:17:21 +02:00
if ( i = = j ) return p ;
2011-07-14 23:42:30 +02:00
System . arraycopy ( this . chunkcache , this . rowdef . objectsize * i , swapspace , 0 , this . rowdef . objectsize ) ;
System . arraycopy ( this . chunkcache , this . rowdef . objectsize * j , this . chunkcache , this . rowdef . objectsize * i , this . rowdef . objectsize ) ;
System . arraycopy ( swapspace , 0 , this . chunkcache , this . rowdef . objectsize * j , this . rowdef . objectsize ) ;
2006-06-20 16:17:21 +02:00
if ( i = = p ) return j ; else if ( j = = p ) return i ; else return p ;
}
2010-04-16 18:07:19 +02:00
protected synchronized void uniq ( ) {
2011-07-16 12:08:43 +02:00
Array . uniq ( this ) ;
2006-06-20 16:17:21 +02:00
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public synchronized ArrayList < RowCollection > removeDoubles ( ) throws RowSpaceExceededException {
2008-01-20 02:22:46 +01:00
assert ( this . rowdef . objectOrder ! = null ) ;
// removes double-occurrences of chunks
2008-05-02 00:40:42 +02:00
// in contrast to uniq() this removes also the remaining, non-double entry that had a double-occurrence to the others
2008-01-20 02:22:46 +01:00
// all removed chunks are returned in an array
2011-07-14 23:42:30 +02:00
sort ( ) ;
2009-01-30 16:33:00 +01:00
final ArrayList < RowCollection > report = new ArrayList < RowCollection > ( ) ;
2011-07-14 23:42:30 +02:00
if ( this . chunkcount < 2 ) return report ;
int i = this . chunkcount - 2 ;
2008-01-20 02:22:46 +01:00
boolean u = true ;
2009-01-30 16:33:00 +01:00
RowCollection collection = new RowCollection ( this . rowdef , 2 ) ;
2008-01-20 02:22:46 +01:00
try {
while ( i > = 0 ) {
2009-03-13 10:30:19 +01:00
if ( match ( i , i + 1 ) ) {
2008-04-24 15:31:55 +02:00
collection . addUnique ( get ( i + 1 , false ) ) ;
2008-01-20 02:22:46 +01:00
removeRow ( i + 1 , false ) ;
2011-07-14 23:42:30 +02:00
if ( i + 1 < this . chunkcount - 1 ) u = false ;
2009-12-02 01:37:59 +01:00
} else if ( ! collection . isEmpty ( ) ) {
2008-01-20 02:22:46 +01:00
// finish collection of double occurrences
2008-04-24 15:31:55 +02:00
collection . addUnique ( get ( i + 1 , false ) ) ;
2008-01-20 02:22:46 +01:00
removeRow ( i + 1 , false ) ;
2011-07-14 23:42:30 +02:00
if ( i + 1 < this . chunkcount - 1 ) u = false ;
2010-04-28 00:22:16 +02:00
collection . trim ( ) ;
2008-01-20 02:22:46 +01:00
report . add ( collection ) ;
2009-01-30 16:33:00 +01:00
collection = new RowSet ( this . rowdef , 2 ) ;
2008-01-20 02:22:46 +01:00
}
i - - ;
}
2008-08-02 14:12:04 +02:00
} catch ( final RuntimeException e ) {
2009-01-31 00:33:47 +01:00
Log . logWarning ( " kelondroRowCollection " , e . getMessage ( ) , e ) ;
2008-01-20 02:22:46 +01:00
} finally {
2011-07-14 23:42:30 +02:00
if ( ! u ) sort ( ) ;
2008-01-20 02:22:46 +01:00
}
return report ;
}
2011-07-14 23:42:30 +02:00
2007-11-09 01:51:38 +01:00
public synchronized boolean isSorted ( ) {
assert ( this . rowdef . objectOrder ! = null ) ;
2011-07-14 23:42:30 +02:00
if ( this . chunkcount < = 1 ) return true ;
if ( this . chunkcount ! = this . sortBound ) return false ;
2009-01-09 01:06:36 +01:00
/ *
2007-11-09 01:51:38 +01:00
for ( int i = 0 ; i < chunkcount - 1 ; i + + ) {
2011-03-07 21:36:40 +01:00
//System.out.println("*" + UTF8.String(get(i).getPrimaryKeyBytes()));
2007-11-09 01:51:38 +01:00
if ( compare ( i , i + 1 ) > 0 ) {
2011-03-07 21:36:40 +01:00
System . out . println ( " ? " + UTF8 . String ( get ( i + 1 , false ) . getPrimaryKeyBytes ( ) ) ) ;
2007-11-09 01:51:38 +01:00
return false ;
}
}
2009-01-09 01:06:36 +01:00
* /
2007-11-09 01:51:38 +01:00
return true ;
}
2011-07-14 23:42:30 +02:00
2007-04-17 17:15:47 +02:00
public synchronized String toString ( ) {
2011-03-09 10:29:05 +01:00
final StringBuilder s = new StringBuilder ( 80 ) ;
2009-01-30 16:33:00 +01:00
final Iterator < Row . Entry > i = iterator ( ) ;
2007-12-27 18:56:59 +01:00
if ( i . hasNext ( ) ) s . append ( i . next ( ) . toString ( ) ) ;
2008-06-06 18:01:27 +02:00
while ( i . hasNext ( ) ) s . append ( " , " + ( i . next ( ) ) . toString ( ) ) ;
2011-03-07 21:36:40 +01:00
return s . toString ( ) ;
2006-06-20 16:17:21 +02:00
}
2008-08-02 14:12:04 +02:00
private final int compare ( final int i , final int j ) {
2011-07-14 23:42:30 +02:00
assert ( this . chunkcount * this . rowdef . objectsize < = this . chunkcache . length ) : " chunkcount = " + this . chunkcount + " , objsize = " + this . rowdef . objectsize + " , chunkcache.length = " + this . chunkcache . length ;
assert ( i > = 0 ) & & ( i < this . chunkcount ) : " i = " + i + " , chunkcount = " + this . chunkcount ;
assert ( j > = 0 ) & & ( j < this . chunkcount ) : " j = " + j + " , chunkcount = " + this . chunkcount ;
2006-12-06 04:02:57 +01:00
assert ( this . rowdef . objectOrder ! = null ) ;
2006-06-20 16:17:21 +02:00
if ( i = = j ) return 0 ;
2008-05-13 17:28:55 +02:00
//assert (!bugappearance(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength));
//assert (!bugappearance(chunkcache, j * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength));
2008-08-02 14:12:04 +02:00
final int c = this . rowdef . objectOrder . compare (
2011-07-14 23:42:30 +02:00
this . chunkcache ,
2009-03-13 17:52:31 +01:00
i * this . rowdef . objectsize ,
2011-07-14 23:42:30 +02:00
this . chunkcache ,
2009-03-13 17:52:31 +01:00
j * this . rowdef . objectsize ,
2007-11-07 23:38:09 +01:00
this . rowdef . primaryKeyLength ) ;
2006-06-20 16:17:21 +02:00
return c ;
}
2006-06-22 17:42:09 +02:00
2010-04-28 10:38:57 +02:00
protected synchronized int compare ( final byte [ ] a , final int astart , final int chunknumber ) {
2011-07-14 23:42:30 +02:00
assert ( chunknumber < this . chunkcount ) ;
2010-04-28 10:38:57 +02:00
assert a . length - astart > = this . rowdef . primaryKeyLength ;
final int len = Math . min ( a . length - astart , this . rowdef . primaryKeyLength ) ;
2011-07-14 23:42:30 +02:00
return this . rowdef . objectOrder . compare ( a , astart , this . chunkcache , chunknumber * this . rowdef . objectsize , len ) ;
2007-03-14 09:55:05 +01:00
}
2011-07-14 23:42:30 +02:00
2009-03-13 10:30:19 +01:00
protected final boolean match ( final int i , final int j ) {
2011-07-14 23:42:30 +02:00
assert ( this . chunkcount * this . rowdef . objectsize < = this . chunkcache . length ) : " chunkcount = " + this . chunkcount + " , objsize = " + this . rowdef . objectsize + " , chunkcache.length = " + this . chunkcache . length ;
assert ( i > = 0 ) & & ( i < this . chunkcount ) : " i = " + i + " , chunkcount = " + this . chunkcount ;
assert ( j > = 0 ) & & ( j < this . chunkcount ) : " j = " + j + " , chunkcount = " + this . chunkcount ;
if ( i > = this . chunkcount ) return false ;
if ( j > = this . chunkcount ) return false ;
2009-03-13 10:30:19 +01:00
assert ( this . rowdef . objectOrder ! = null ) ;
if ( i = = j ) return true ;
2009-03-13 17:52:31 +01:00
int astart = i * this . rowdef . objectsize ;
int bstart = j * this . rowdef . objectsize ;
2009-03-13 10:30:19 +01:00
int k = this . rowdef . primaryKeyLength ;
while ( k - - ! = 0 ) {
2011-07-14 23:42:30 +02:00
if ( this . chunkcache [ astart + + ] ! = this . chunkcache [ bstart + + ] ) return false ;
2009-03-13 10:30:19 +01:00
}
return true ;
}
2011-07-14 23:42:30 +02:00
2010-04-28 10:38:57 +02:00
protected synchronized boolean match ( final byte [ ] a , int astart , final int chunknumber ) {
2011-07-14 23:42:30 +02:00
if ( chunknumber > = this . chunkcount ) return false ;
2009-03-13 17:52:31 +01:00
int p = chunknumber * this . rowdef . objectsize ;
2010-04-28 10:38:57 +02:00
assert a . length - astart > = this . rowdef . primaryKeyLength ;
int len = Math . min ( a . length - astart , this . rowdef . primaryKeyLength ) ;
2009-03-13 10:30:19 +01:00
while ( len - - ! = 0 ) {
2011-07-14 23:42:30 +02:00
if ( a [ astart + + ] ! = this . chunkcache [ p + + ] ) return false ;
2009-03-13 10:30:19 +01:00
}
return true ;
2007-03-14 09:55:05 +01:00
}
2008-06-15 01:17:56 +02:00
2007-03-14 09:55:05 +01:00
public synchronized void close ( ) {
2011-07-14 23:42:30 +02:00
this . chunkcache = null ;
2007-03-14 09:55:05 +01:00
}
2011-07-14 23:42:30 +02:00
2008-08-02 14:12:04 +02:00
private static long d ( final long a , final long b ) {
2008-08-02 15:57:00 +02:00
if ( b = = 0 ) return a ;
return a / b ;
2007-11-10 09:57:00 +01:00
}
2011-07-14 23:42:30 +02:00
2008-05-03 11:06:00 +02:00
private static Random random = null ;
2007-11-11 01:28:22 +01:00
private static String randomHash ( ) {
return
2011-02-25 01:56:31 +01:00
Base64Order . enhancedCoder . encodeLongSB ( random . nextLong ( ) , 4 ) . toString ( ) +
Base64Order . enhancedCoder . encodeLongSB ( random . nextLong ( ) , 4 ) . toString ( ) +
Base64Order . enhancedCoder . encodeLongSB ( random . nextLong ( ) , 4 ) . toString ( ) ;
2007-11-11 01:28:22 +01:00
}
2011-07-14 23:42:30 +02:00
2009-12-10 00:27:26 +01:00
public static void test ( final int testsize ) throws RowSpaceExceededException {
2009-01-30 16:33:00 +01:00
final Row r = new Row ( new Column [ ] {
2009-01-30 23:44:20 +01:00
new Column ( " hash " , Column . celltype_string , Column . encoder_bytes , 12 , " hash " ) } ,
2009-03-13 17:52:31 +01:00
Base64Order . enhancedCoder ) ;
2011-07-14 23:42:30 +02:00
2011-07-16 12:08:43 +02:00
// test compare method
random = new Random ( 0 ) ;
for ( int i = 0 ; i < testsize ; i + + ) {
final byte [ ] a = ASCII . getBytes ( randomHash ( ) ) ;
final byte [ ] b = ASCII . getBytes ( randomHash ( ) ) ;
final int c = Base64Order . enhancedCoder . compare ( a , b ) ;
if ( c = = 0 & & Base64Order . enhancedCoder . compare ( b , a ) ! = 0 )
System . out . println ( " compare failed / =; a = " + ASCII . String ( a ) + " , b = " + ASCII . String ( b ) ) ;
if ( c = = - 1 & & Base64Order . enhancedCoder . compare ( b , a ) ! = 1 )
System . out . println ( " compare failed / =; a < " + ASCII . String ( a ) + " , b = " + ASCII . String ( b ) ) ;
if ( c = = 1 & & Base64Order . enhancedCoder . compare ( b , a ) ! = - 1 )
System . out . println ( " compare failed / =; a > " + ASCII . String ( a ) + " , b = " + ASCII . String ( b ) ) ;
}
// test sorting methods
2009-01-30 16:33:00 +01:00
RowCollection a = new RowCollection ( r , testsize ) ;
2008-01-20 02:22:46 +01:00
a . add ( " AAAAAAAAAAAA " . getBytes ( ) ) ;
a . add ( " BBBBBBBBBBBB " . getBytes ( ) ) ;
a . add ( " BBBBBBBBBBBB " . getBytes ( ) ) ;
a . add ( " BBBBBBBBBBBB " . getBytes ( ) ) ;
a . add ( " CCCCCCCCCCCC " . getBytes ( ) ) ;
2009-01-30 16:33:00 +01:00
final ArrayList < RowCollection > del = a . removeDoubles ( ) ;
2008-01-20 02:22:46 +01:00
System . out . println ( del + " rows double " ) ;
2009-01-30 16:33:00 +01:00
final Iterator < Row . Entry > j = a . iterator ( ) ;
2011-03-07 21:36:40 +01:00
while ( j . hasNext ( ) ) System . out . println ( UTF8 . String ( j . next ( ) . bytes ( ) ) ) ;
2011-07-14 23:42:30 +02:00
2007-12-20 03:46:41 +01:00
System . out . println ( " kelondroRowCollection test with size = " + testsize ) ;
2009-01-30 16:33:00 +01:00
a = new RowCollection ( r , testsize ) ;
2008-06-16 01:25:57 +02:00
long t0 = System . nanoTime ( ) ;
2007-12-20 03:46:41 +01:00
random = new Random ( 0 ) ;
2011-07-16 12:08:43 +02:00
for ( int i = 0 ; i < testsize / 2 ; i + + ) a . add ( randomHash ( ) . getBytes ( ) ) ;
//System.out.println("check: after first random feed"); for (final Row.Entry w: a) System.out.println("1 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
2007-12-20 03:46:41 +01:00
random = new Random ( 0 ) ;
2011-07-16 12:08:43 +02:00
for ( int i = 0 ; i < testsize / 2 ; i + + ) a . add ( randomHash ( ) . getBytes ( ) ) ;
//System.out.println("check: after second random feed"); for (final Row.Entry w: a) System.out.println("2 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
2007-12-20 03:46:41 +01:00
a . sort ( ) ;
2011-07-16 12:08:43 +02:00
//System.out.println("check: after sort"); for (final Row.Entry w: a) System.out.println("3 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
2007-12-20 03:46:41 +01:00
a . uniq ( ) ;
2011-07-16 12:08:43 +02:00
//System.out.println("check: after sort uniq"); for (final Row.Entry w: a) System.out.println("4 check-row " + ASCII.String(w.getPrimaryKeyBytes()));
// check order that the element have
for ( int i = 0 ; i < a . size ( ) - 1 ; i + + ) {
if ( a . get ( i , false ) . compareTo ( a . get ( i + 1 , false ) ) > = 0 ) System . out . println ( " Compare error at pos " + i + " : a.get(i)= " + a . get ( i , false ) + " , a.get(i + 1)= " + a . get ( i + 1 , false ) ) ;
}
2008-03-21 00:11:04 +01:00
long t1 = System . nanoTime ( ) ;
System . out . println ( " create a : " + ( t1 - t0 ) + " nanoseconds, " + d ( testsize , ( t1 - t0 ) ) + " entries/nanoseconds; a.size() = " + a . size ( ) ) ;
2011-07-14 23:42:30 +02:00
2009-01-30 16:33:00 +01:00
final RowCollection c = new RowCollection ( r , testsize ) ;
2007-11-11 01:28:22 +01:00
random = new Random ( 0 ) ;
2008-03-21 00:11:04 +01:00
t0 = System . nanoTime ( ) ;
2007-11-09 01:51:38 +01:00
for ( int i = 0 ; i < testsize ; i + + ) {
2007-11-11 01:28:22 +01:00
c . add ( randomHash ( ) . getBytes ( ) ) ;
2007-11-09 01:51:38 +01:00
}
2008-03-21 00:11:04 +01:00
t1 = System . nanoTime ( ) ;
System . out . println ( " create c : " + ( t1 - t0 ) + " nanoseconds, " + d ( testsize , ( t1 - t0 ) ) + " entries/nanoseconds " ) ;
2009-01-30 16:33:00 +01:00
final RowCollection d = new RowCollection ( r , testsize ) ;
2007-11-09 01:51:38 +01:00
for ( int i = 0 ; i < testsize ; i + + ) {
2011-02-25 13:41:27 +01:00
d . add ( c . get ( i , false ) . getPrimaryKeyBytes ( ) ) ;
2007-11-09 01:51:38 +01:00
}
2008-08-02 14:12:04 +02:00
final long t2 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " copy c -> d: " + ( t2 - t1 ) + " nanoseconds, " + d ( testsize , ( t2 - t1 ) ) + " entries/nanoseconds " ) ;
2010-04-16 18:07:19 +02:00
//availableCPU = 1;
2007-11-09 01:51:38 +01:00
c . sort ( ) ;
2008-08-02 14:12:04 +02:00
final long t3 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " sort c (1) : " + ( t3 - t2 ) + " nanoseconds, " + d ( testsize , ( t3 - t2 ) ) + " entries/nanoseconds " ) ;
2010-04-16 18:07:19 +02:00
//availableCPU = 2;
2007-11-09 01:51:38 +01:00
d . sort ( ) ;
2008-08-02 14:12:04 +02:00
final long t4 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " sort d (2) : " + ( t4 - t3 ) + " nanoseconds, " + d ( testsize , ( t4 - t3 ) ) + " entries/nanoseconds " ) ;
2007-11-09 01:51:38 +01:00
c . uniq ( ) ;
2008-08-02 14:12:04 +02:00
final long t5 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " uniq c : " + ( t5 - t4 ) + " nanoseconds, " + d ( testsize , ( t5 - t4 ) ) + " entries/nanoseconds " ) ;
2007-11-09 01:51:38 +01:00
d . uniq ( ) ;
2008-08-02 14:12:04 +02:00
final long t6 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " uniq d : " + ( t6 - t5 ) + " nanoseconds, " + d ( testsize , ( t6 - t5 ) ) + " entries/nanoseconds " ) ;
2007-11-11 01:28:22 +01:00
random = new Random ( 0 ) ;
2009-01-30 16:33:00 +01:00
final RowSet e = new RowSet ( r , testsize ) ;
2008-06-15 00:51:47 +02:00
for ( int i = 0 ; i < testsize ; i + + ) {
e . put ( r . newEntry ( randomHash ( ) . getBytes ( ) ) ) ;
}
2008-08-02 14:12:04 +02:00
final long t7 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " create e : " + ( t7 - t6 ) + " nanoseconds, " + d ( testsize , ( t7 - t6 ) ) + " entries/nanoseconds " ) ;
2007-11-09 16:34:11 +01:00
e . sort ( ) ;
2008-08-02 14:12:04 +02:00
final long t8 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " sort e (2) : " + ( t8 - t7 ) + " nanoseconds, " + d ( testsize , ( t8 - t7 ) ) + " entries/nanoseconds " ) ;
2007-11-09 16:34:11 +01:00
e . uniq ( ) ;
2008-08-02 14:12:04 +02:00
final long t9 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " uniq e : " + ( t9 - t8 ) + " nanoseconds, " + d ( testsize , ( t9 - t8 ) ) + " entries/nanoseconds " ) ;
2008-08-02 14:12:04 +02:00
final boolean cis = c . isSorted ( ) ;
final long t10 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " c isSorted = " + ( ( cis ) ? " true " : " false " ) + " : " + ( t10 - t9 ) + " nanoseconds " ) ;
2008-08-02 14:12:04 +02:00
final boolean dis = d . isSorted ( ) ;
final long t11 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " d isSorted = " + ( ( dis ) ? " true " : " false " ) + " : " + ( t11 - t10 ) + " nanoseconds " ) ;
2008-08-02 14:12:04 +02:00
final boolean eis = e . isSorted ( ) ;
final long t12 = System . nanoTime ( ) ;
2008-03-21 00:11:04 +01:00
System . out . println ( " e isSorted = " + ( ( eis ) ? " true " : " false " ) + " : " + ( t12 - t11 ) + " nanoseconds " ) ;
2007-11-12 02:14:51 +01:00
random = new Random ( 0 ) ;
boolean allfound = true ;
for ( int i = 0 ; i < testsize ; i + + ) {
2008-08-02 14:12:04 +02:00
final String rh = randomHash ( ) ;
2011-07-15 10:38:10 +02:00
if ( e . get ( rh . getBytes ( ) , true ) = = null ) {
2007-11-12 02:14:51 +01:00
allfound = false ;
2008-06-15 00:51:47 +02:00
System . out . println ( " not found hash " + rh + " at attempt " + i ) ;
2007-11-12 02:14:51 +01:00
break ;
}
}
2008-08-02 14:12:04 +02:00
final long t13 = System . nanoTime ( ) ;
2008-06-15 00:51:47 +02:00
System . out . println ( " e allfound = " + ( ( allfound ) ? " true " : " false " ) + " : " + ( t13 - t12 ) + " nanoseconds " ) ;
2007-11-12 02:14:51 +01:00
boolean noghosts = true ;
for ( int i = 0 ; i < testsize ; i + + ) {
2011-07-15 10:38:10 +02:00
if ( e . get ( randomHash ( ) . getBytes ( ) , true ) ! = null ) {
2007-11-12 02:14:51 +01:00
noghosts = false ;
break ;
}
}
2008-08-02 14:12:04 +02:00
final long t14 = System . nanoTime ( ) ;
2008-06-15 00:51:47 +02:00
System . out . println ( " e noghosts = " + ( ( noghosts ) ? " true " : " false " ) + " : " + ( t14 - t13 ) + " nanoseconds " ) ;
2007-11-12 02:14:51 +01:00
System . out . println ( " Result size: c = " + c . size ( ) + " , d = " + d . size ( ) + " , e = " + e . size ( ) ) ;
2007-11-09 01:51:38 +01:00
System . out . println ( ) ;
}
2011-07-14 23:42:30 +02:00
2008-08-02 14:12:04 +02:00
public static void main ( final String [ ] args ) {
2009-12-10 00:27:26 +01:00
try {
2011-07-16 12:08:43 +02:00
test ( 500000 ) ;
//test(1000);
//test(50000);
//test(100000);
//test(1000000);
Log . shutdown ( ) ;
Array . terminate ( ) ;
2011-07-14 23:42:30 +02:00
} catch ( final RowSpaceExceededException e ) {
2009-12-10 00:27:26 +01:00
e . printStackTrace ( ) ;
}
2006-08-05 01:04:03 +02:00
}
2011-07-16 12:08:43 +02:00
2008-04-19 09:54:44 +02:00
}
2011-07-16 12:08:43 +02:00
/ *
neues sort
[ { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } ] rows double
AAAAAAAAAAAA
CCCCCCCCCCCC
kelondroRowCollection test with size = 50000
create a : 550687000 nanoseconds , 0 entries / nanoseconds ; a . size ( ) = 25000
create c : 31556000 nanoseconds , 0 entries / nanoseconds
copy c - > d : 13798000 nanoseconds , 0 entries / nanoseconds
sort c ( 1 ) : 80845000 nanoseconds , 0 entries / nanoseconds
sort d ( 2 ) : 79981000 nanoseconds , 0 entries / nanoseconds
uniq c : 3697000 nanoseconds , 0 entries / nanoseconds
uniq d : 3649000 nanoseconds , 0 entries / nanoseconds
create e : 5719968000 nanoseconds , 0 entries / nanoseconds
sort e ( 2 ) : 65563000 nanoseconds , 0 entries / nanoseconds
uniq e : 3540000 nanoseconds , 0 entries / nanoseconds
c isSorted = true : 119000 nanoseconds
d isSorted = true : 90000 nanoseconds
e isSorted = true : 94000 nanoseconds
e allfound = true : 64049000 nanoseconds
e noghosts = true : 57150000 nanoseconds
Result size : c = 50000 , d = 50000 , e = 50000
altes plus concurrency
[ { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } ] rows double
AAAAAAAAAAAA
CCCCCCCCCCCC
kelondroRowCollection test with size = 50000
Compare error at pos 23548 : a . get ( i ) = { hash = 8dV7ACC_D1ir } , a . get ( i + 1 ) = { hash = 8Ypevst5u_tV }
create a : 507683000 nanoseconds , 0 entries / nanoseconds ; a . size ( ) = 25001
create c : 38420000 nanoseconds , 0 entries / nanoseconds
copy c - > d : 12995000 nanoseconds , 0 entries / nanoseconds
sort c ( 1 ) : 20805000 nanoseconds , 0 entries / nanoseconds
sort d ( 2 ) : 18935000 nanoseconds , 0 entries / nanoseconds
uniq c : 3712000 nanoseconds , 0 entries / nanoseconds
uniq d : 3604000 nanoseconds , 0 entries / nanoseconds
create e : 1333761000 nanoseconds , 0 entries / nanoseconds
sort e ( 2 ) : 16124000 nanoseconds , 0 entries / nanoseconds
uniq e : 3453000 nanoseconds , 0 entries / nanoseconds
c isSorted = true : 115000 nanoseconds
d isSorted = true : 89000 nanoseconds
e isSorted = true : 94000 nanoseconds
e allfound = true : 58685000 nanoseconds
e noghosts = true : 59132000 nanoseconds
Result size : c = 50000 , d = 50000 , e = 50000
altes ohne concurrency
[ { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } , { hash = BBBBBBBBBBBB } ] rows double
AAAAAAAAAAAA
CCCCCCCCCCCC
kelondroRowCollection test with size = 50000
Compare error at pos 23548 : a . get ( i ) = { hash = 8dV7ACC_D1ir } , a . get ( i + 1 ) = { hash = 8Ypevst5u_tV }
create a : 502494000 nanoseconds , 0 entries / nanoseconds ; a . size ( ) = 25001
create c : 36062000 nanoseconds , 0 entries / nanoseconds
copy c - > d : 16164000 nanoseconds , 0 entries / nanoseconds
sort c ( 1 ) : 32442000 nanoseconds , 0 entries / nanoseconds
sort d ( 2 ) : 32025000 nanoseconds , 0 entries / nanoseconds
uniq c : 3581000 nanoseconds , 0 entries / nanoseconds
uniq d : 3561000 nanoseconds , 0 entries / nanoseconds
create e : 1788591000 nanoseconds , 0 entries / nanoseconds
sort e ( 2 ) : 22318000 nanoseconds , 0 entries / nanoseconds
uniq e : 3438000 nanoseconds , 0 entries / nanoseconds
c isSorted = true : 113000 nanoseconds
d isSorted = true : 89000 nanoseconds
e isSorted = true : 94000 nanoseconds
e allfound = true : 64161000 nanoseconds
e noghosts = true : 55975000 nanoseconds
Result size : c = 50000 , d = 50000 , e = 50000
* /