2008-08-19 16:10:40 +02:00
// kelondroBLOBArray.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 19.08.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-01-30 23:08:08 +01:00
package de.anomic.kelondro.blob ;
2008-08-19 16:10:40 +02:00
import java.io.File ;
import java.io.IOException ;
import java.text.ParseException ;
import java.util.ArrayList ;
import java.util.Date ;
2009-04-01 14:39:11 +02:00
import java.util.HashSet ;
2008-08-19 16:10:40 +02:00
import java.util.Iterator ;
import java.util.List ;
import java.util.TreeMap ;
import java.util.concurrent.CopyOnWriteArrayList ;
2009-04-02 15:26:47 +02:00
import de.anomic.kelondro.index.Row ;
2009-01-30 23:08:08 +01:00
import de.anomic.kelondro.order.ByteOrder ;
import de.anomic.kelondro.order.CloneableIterator ;
import de.anomic.kelondro.order.NaturalOrder ;
2009-01-30 23:44:20 +01:00
import de.anomic.kelondro.order.MergeIterator ;
2009-04-15 08:34:27 +02:00
import de.anomic.kelondro.text.Reference ;
2009-04-02 15:26:47 +02:00
import de.anomic.kelondro.text.ReferenceContainer ;
2009-04-15 08:34:27 +02:00
import de.anomic.kelondro.text.ReferenceFactory ;
2009-04-02 15:26:47 +02:00
import de.anomic.kelondro.text.ReferenceContainerCache.blobFileEntries ;
2009-03-06 16:53:20 +01:00
import de.anomic.kelondro.util.DateFormatter ;
2009-03-30 17:31:25 +02:00
import de.anomic.kelondro.util.FileUtils ;
2009-04-02 15:26:47 +02:00
import de.anomic.kelondro.util.Log ;
2009-01-30 16:33:00 +01:00
2009-01-30 23:08:08 +01:00
public class BLOBArray implements BLOB {
2008-08-19 16:10:40 +02:00
/ *
* This class implements a BLOB using a set of kelondroBLOBHeap objects
* In addition to a kelondroBLOBHeap this BLOB can delete large amounts of data using a given time limit .
* This is realized by creating separate BLOB files . New Files are created when either
* - a given time limit is reached
* - a given space limit is reached
* To organize such an array of BLOB files , the following file name structure is used :
* < BLOB - Name > / < YYYYMMDDhhmm > . blob
* That means all BLOB files are inside a directory that has the name of the BLOBArray .
* To delete content that is out - dated , one special method is implemented that deletes content by a given
* time - out . Deletions are not made automatically , they must be triggered using this method .
* /
2008-10-16 23:24:09 +02:00
public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L ;
2008-08-19 16:10:40 +02:00
private int keylength ;
2009-01-30 16:33:00 +01:00
private ByteOrder ordering ;
2008-08-19 16:10:40 +02:00
private File heapLocation ;
2008-10-16 23:24:09 +02:00
private long fileAgeLimit ;
private long fileSizeLimit ;
private long repositoryAgeMax ;
private long repositorySizeMax ;
2008-08-19 16:10:40 +02:00
private List < blobItem > blobs ;
2009-04-02 17:08:56 +02:00
private String prefix ;
2008-12-10 12:15:19 +01:00
private int buffersize ;
2008-08-19 16:10:40 +02:00
2009-01-30 23:08:08 +01:00
public BLOBArray (
2008-08-19 16:10:40 +02:00
final File heapLocation ,
2009-04-02 17:08:56 +02:00
final String prefix ,
2008-10-19 20:10:42 +02:00
final int keylength ,
2009-01-30 16:33:00 +01:00
final ByteOrder ordering ,
2008-12-10 12:15:19 +01:00
final int buffersize ) throws IOException {
2008-08-19 16:10:40 +02:00
this . keylength = keylength ;
2009-04-02 17:08:56 +02:00
this . prefix = prefix ;
2008-08-19 16:10:40 +02:00
this . ordering = ordering ;
2008-12-10 12:15:19 +01:00
this . buffersize = buffersize ;
2008-08-19 16:10:40 +02:00
this . heapLocation = heapLocation ;
2008-10-19 20:10:42 +02:00
this . fileAgeLimit = oneMonth ;
2009-05-25 08:59:21 +02:00
this . fileSizeLimit = ( long ) Integer . MAX_VALUE ;
2008-10-16 23:24:09 +02:00
this . repositoryAgeMax = Long . MAX_VALUE ;
this . repositorySizeMax = Long . MAX_VALUE ;
2008-08-19 16:10:40 +02:00
// check existence of the heap directory
if ( heapLocation . exists ( ) ) {
2008-08-20 10:37:39 +02:00
if ( ! heapLocation . isDirectory ( ) ) throw new IOException ( " the BLOBArray directory " + heapLocation . toString ( ) + " does not exist (is blocked by a file with same name) " ) ;
2008-08-19 16:10:40 +02:00
} else {
2008-08-20 10:37:39 +02:00
if ( ! heapLocation . mkdirs ( ) ) throw new IOException ( " the BLOBArray directory " + heapLocation . toString ( ) + " does not exist (can not be created) " ) ;
2008-08-19 16:10:40 +02:00
}
// register all blob files inside this directory
String [ ] files = heapLocation . list ( ) ;
2009-04-01 14:39:11 +02:00
HashSet < String > fh = new HashSet < String > ( ) ;
for ( int i = 0 ; i < files . length ; i + + ) fh . add ( files [ i ] ) ;
// delete unused temporary files
boolean deletions = false ;
for ( int i = 0 ; i < files . length ; i + + ) {
if ( files [ i ] . endsWith ( " .tmp " ) ) {
FileUtils . deletedelete ( new File ( heapLocation , files [ i ] ) ) ;
deletions = true ;
}
if ( files [ i ] . endsWith ( " .idx " ) | | files [ i ] . endsWith ( " .gap " ) ) {
String s = files [ i ] . substring ( 0 , files [ i ] . length ( ) - 17 ) ;
if ( ! fh . contains ( s ) ) {
FileUtils . deletedelete ( new File ( heapLocation , files [ i ] ) ) ;
deletions = true ;
}
}
}
if ( deletions ) files = heapLocation . list ( ) ; // make a fresh list
2009-04-02 17:08:56 +02:00
// migrate old file names
Date d ;
long time ;
deletions = false ;
for ( int i = 0 ; i < files . length ; i + + ) {
if ( files [ i ] . length ( ) > = 19 & & files [ i ] . endsWith ( " .blob " ) ) {
try {
d = DateFormatter . parseShortSecond ( files [ i ] . substring ( 0 , 14 ) ) ;
new File ( heapLocation , files [ i ] ) . renameTo ( newBLOB ( d ) ) ;
deletions = true ;
} catch ( ParseException e ) { continue ; }
}
}
if ( deletions ) files = heapLocation . list ( ) ; // make a fresh list
2009-04-01 14:39:11 +02:00
// find maximum time: the file with this time will be given a write buffer
2008-08-19 16:10:40 +02:00
TreeMap < Long , blobItem > sortedItems = new TreeMap < Long , blobItem > ( ) ;
2009-01-30 23:08:08 +01:00
BLOB oneBlob ;
2008-08-19 16:10:40 +02:00
File f ;
2009-04-02 17:08:56 +02:00
long maxtime = 0 ;
2008-08-19 16:10:40 +02:00
for ( int i = 0 ; i < files . length ; i + + ) {
2009-04-02 17:08:56 +02:00
if ( files [ i ] . length ( ) > = 22 & & files [ i ] . startsWith ( prefix ) & & files [ i ] . endsWith ( " .blob " ) ) {
2008-08-19 16:10:40 +02:00
try {
2009-04-02 17:08:56 +02:00
d = DateFormatter . parseShortMilliSecond ( files [ i ] . substring ( prefix . length ( ) + 1 , prefix . length ( ) + 18 ) ) ;
2008-12-10 12:15:19 +01:00
time = d . getTime ( ) ;
if ( time > maxtime ) maxtime = time ;
} catch ( ParseException e ) { continue ; }
}
}
2009-05-27 17:04:04 +02:00
// open all blob files
2008-12-10 12:15:19 +01:00
for ( int i = 0 ; i < files . length ; i + + ) {
2009-04-02 17:08:56 +02:00
if ( files [ i ] . length ( ) > = 22 & & files [ i ] . startsWith ( prefix ) & & files [ i ] . endsWith ( " .blob " ) ) {
try {
d = DateFormatter . parseShortMilliSecond ( files [ i ] . substring ( prefix . length ( ) + 1 , prefix . length ( ) + 18 ) ) ;
2008-12-10 12:15:19 +01:00
f = new File ( heapLocation , files [ i ] ) ;
time = d . getTime ( ) ;
2009-04-03 14:42:24 +02:00
oneBlob = ( time = = maxtime ) ? new BLOBHeap ( f , keylength , ordering , buffersize ) : new BLOBHeapModifier ( f , keylength , ordering ) ;
2008-12-10 12:15:19 +01:00
sortedItems . put ( Long . valueOf ( time ) , new blobItem ( d , f , oneBlob ) ) ;
2008-08-19 16:10:40 +02:00
} catch ( ParseException e ) { continue ; }
}
}
// read the blob tree in a sorted way and write them into an array
blobs = new CopyOnWriteArrayList < blobItem > ( ) ;
for ( blobItem bi : sortedItems . values ( ) ) {
blobs . add ( bi ) ;
}
}
2009-01-21 19:23:37 +01:00
/ * *
* add a blob file to the array .
* note that this file must be generated with a file name from newBLOB ( )
* @param location
* @throws IOException
* /
2009-04-03 14:42:24 +02:00
public synchronized void mountBLOB ( File location , boolean full ) throws IOException {
2009-01-21 19:23:37 +01:00
Date d ;
try {
2009-04-02 17:08:56 +02:00
d = DateFormatter . parseShortMilliSecond ( location . getName ( ) . substring ( prefix . length ( ) + 1 , prefix . length ( ) + 18 ) ) ;
2009-01-21 19:23:37 +01:00
} catch ( ParseException e ) {
throw new IOException ( " date parse problem with file " + location . toString ( ) + " : " + e . getMessage ( ) ) ;
}
2009-04-03 14:42:24 +02:00
BLOB oneBlob = ( full & & buffersize > 0 ) ? new BLOBHeap ( location , keylength , ordering , buffersize ) : new BLOBHeapModifier ( location , keylength , ordering ) ;
2009-01-21 19:23:37 +01:00
blobs . add ( new blobItem ( d , location , oneBlob ) ) ;
}
2009-03-18 21:21:19 +01:00
public synchronized void unmountBLOB ( File location , boolean writeIDX ) {
2009-01-21 19:23:37 +01:00
blobItem b ;
2009-03-31 19:03:13 +02:00
for ( int i = 0 ; i < this . blobs . size ( ) ; i + + ) {
b = this . blobs . get ( i ) ;
2009-05-04 00:54:47 +02:00
if ( b . location . getAbsolutePath ( ) . equals ( location . getAbsolutePath ( ) ) ) {
2009-03-31 19:03:13 +02:00
this . blobs . remove ( i ) ;
2009-03-18 17:14:31 +01:00
b . blob . close ( writeIDX ) ;
2009-03-30 21:05:08 +02:00
b . blob = null ;
b . location = null ;
2009-01-21 19:23:37 +01:00
return ;
}
}
2009-05-04 00:54:47 +02:00
Log . logSevere ( " BLOBArray " , " file " + location + " cannot be unmounted. The file " + ( ( location . exists ( ) ) ? " exists. " : " does not exist. " ) ) ;
2009-01-21 19:23:37 +01:00
}
2009-03-30 21:05:08 +02:00
private File unmount ( int idx ) {
blobItem b = this . blobs . remove ( idx ) ;
b . blob . close ( false ) ;
b . blob = null ;
File f = b . location ;
b . location = null ;
return f ;
}
2009-03-31 18:49:02 +02:00
public synchronized File [ ] unmountBestMatch ( double maxq , long maxResultSize ) {
2009-05-04 00:54:47 +02:00
if ( this . blobs . size ( ) < 2 ) return null ;
2009-03-31 18:49:02 +02:00
long l , r ;
2009-05-04 00:54:47 +02:00
File lf , rf ;
2009-03-31 18:49:02 +02:00
double min = Double . MAX_VALUE ;
2009-05-04 00:54:47 +02:00
File [ ] bestMatch = new File [ 2 ] ;
2009-03-31 18:49:02 +02:00
maxResultSize = maxResultSize > > 1 ;
2009-03-30 21:05:08 +02:00
for ( int i = 0 ; i < this . blobs . size ( ) - 1 ; i + + ) {
for ( int j = i + 1 ; j < this . blobs . size ( ) ; j + + ) {
2009-05-04 00:54:47 +02:00
lf = this . blobs . get ( i ) . location ;
rf = this . blobs . get ( j ) . location ;
l = 1 + ( lf . length ( ) > > 1 ) ;
r = 1 + ( rf . length ( ) > > 1 ) ;
2009-03-31 18:49:02 +02:00
if ( l + r > maxResultSize ) continue ;
2009-05-04 00:54:47 +02:00
double q = Math . max ( ( double ) l , ( double ) r ) / Math . min ( ( double ) l , ( double ) r ) ;
2009-03-30 21:05:08 +02:00
if ( q < min ) {
min = q ;
2009-05-04 00:54:47 +02:00
bestMatch [ 0 ] = lf ;
bestMatch [ 1 ] = rf ;
2009-03-30 21:05:08 +02:00
}
}
}
if ( min > maxq ) return null ;
2009-05-04 00:54:47 +02:00
unmountBLOB ( bestMatch [ 1 ] , false ) ;
unmountBLOB ( bestMatch [ 0 ] , false ) ;
return bestMatch ;
2009-03-30 21:05:08 +02:00
}
2009-03-31 18:49:02 +02:00
public synchronized File [ ] unmountSmallest ( long maxResultSize ) {
2009-05-04 00:54:47 +02:00
if ( this . blobs . size ( ) < 2 ) return null ;
File f0 = smallestBLOB ( null , maxResultSize ) ;
2009-03-31 18:49:02 +02:00
if ( f0 = = null ) return null ;
2009-03-31 21:17:45 +02:00
File f1 = smallestBLOB ( f0 , maxResultSize - f0 . length ( ) ) ;
2009-03-31 18:49:02 +02:00
if ( f1 = = null ) return null ;
unmountBLOB ( f0 , false ) ;
unmountBLOB ( f1 , false ) ;
return new File [ ] { f0 , f1 } ;
}
2009-03-31 21:17:45 +02:00
public synchronized File unmountSmallestBLOB ( long maxResultSize ) {
return smallestBLOB ( null , maxResultSize ) ;
2009-03-31 18:49:02 +02:00
}
2009-03-31 21:17:45 +02:00
public synchronized File smallestBLOB ( File excluding , long maxsize ) {
2009-01-21 19:23:37 +01:00
if ( this . blobs . size ( ) = = 0 ) return null ;
2009-05-04 00:54:47 +02:00
File bestFile = null ;
2009-03-20 15:54:37 +01:00
long smallest = Long . MAX_VALUE ;
2009-05-04 00:54:47 +02:00
File f = null ;
2009-03-20 15:54:37 +01:00
for ( int i = 0 ; i < this . blobs . size ( ) ; i + + ) {
2009-05-04 00:54:47 +02:00
f = this . blobs . get ( i ) . location ;
if ( excluding ! = null & & f . getAbsolutePath ( ) . equals ( excluding . getAbsolutePath ( ) ) ) continue ;
if ( f . length ( ) < smallest ) {
smallest = f . length ( ) ;
bestFile = f ;
2009-03-20 15:54:37 +01:00
}
}
2009-03-31 21:17:45 +02:00
if ( smallest > maxsize ) return null ;
2009-05-04 00:54:47 +02:00
return bestFile ;
2009-03-20 15:54:37 +01:00
}
public synchronized File unmountOldestBLOB ( boolean smallestFromFirst2 ) {
if ( this . blobs . size ( ) = = 0 ) return null ;
int idx = 0 ;
if ( smallestFromFirst2 & & this . blobs . get ( 1 ) . location . length ( ) < this . blobs . get ( 0 ) . location . length ( ) ) idx = 1 ;
2009-03-30 21:05:08 +02:00
return unmount ( idx ) ;
2009-03-20 15:54:37 +01:00
}
public synchronized File unmountSimilarSizeBLOB ( long otherSize ) {
if ( this . blobs . size ( ) = = 0 | | otherSize = = 0 ) return null ;
blobItem b ;
double delta , bestDelta = Double . MAX_VALUE ;
int bestIndex = - 1 ;
for ( int i = 0 ; i < this . blobs . size ( ) ; i + + ) {
b = this . blobs . get ( i ) ;
if ( b . location . length ( ) = = 0 ) continue ;
delta = ( ( double ) b . location . length ( ) ) / ( ( double ) otherSize ) ;
if ( delta < 1 . 0 ) delta = 1 . 0 / delta ;
if ( delta < bestDelta ) {
bestDelta = delta ;
bestIndex = i ;
}
}
2009-03-30 21:05:08 +02:00
return unmount ( bestIndex ) ;
2009-01-21 19:23:37 +01:00
}
/ * *
* return the number of BLOB files in this array
* @return
* /
2009-03-18 21:21:19 +01:00
public synchronized int entries ( ) {
2009-03-31 21:17:45 +02:00
return ( this . blobs = = null ) ? 0 : this . blobs . size ( ) ;
2009-01-21 19:23:37 +01:00
}
/ * *
* generate a new BLOB file name with a given date .
* This method is needed to generate a file name that matches to the name structure that is needed for parts of the array
* @param creation
* @return
* /
2009-03-18 21:21:19 +01:00
public synchronized File newBLOB ( Date creation ) {
2009-04-02 17:08:56 +02:00
//return new File(heapLocation, DateFormatter.formatShortSecond(creation) + "." + blobSalt + ".blob");
return new File ( heapLocation , prefix + " . " + DateFormatter . formatShortMilliSecond ( creation ) + " .blob " ) ;
2009-01-21 19:23:37 +01:00
}
2008-12-08 01:17:45 +01:00
public String name ( ) {
return this . heapLocation . getName ( ) ;
}
2008-10-16 23:24:09 +02:00
public void setMaxAge ( long maxAge ) {
this . repositoryAgeMax = maxAge ;
2008-10-19 20:10:42 +02:00
this . fileAgeLimit = Math . min ( oneMonth , maxAge / 10 ) ;
2008-10-16 23:24:09 +02:00
}
public void setMaxSize ( long maxSize ) {
this . repositorySizeMax = maxSize ;
2009-05-25 08:59:21 +02:00
this . fileSizeLimit = Math . min ( ( long ) Integer . MAX_VALUE , maxSize / 10L ) ;
2008-10-16 23:24:09 +02:00
}
private void executeLimits ( ) {
// check if storage limits are reached and execute consequences
if ( blobs . size ( ) = = 0 ) return ;
// age limit:
while ( blobs . size ( ) > 0 & & System . currentTimeMillis ( ) - blobs . get ( 0 ) . creation . getTime ( ) - this . fileAgeLimit > this . repositoryAgeMax ) {
// too old
blobItem oldestBLOB = blobs . remove ( 0 ) ;
2009-03-18 17:14:31 +01:00
oldestBLOB . blob . close ( false ) ;
2009-03-30 17:31:25 +02:00
oldestBLOB . blob = null ;
FileUtils . deletedelete ( oldestBLOB . location ) ;
2008-10-16 23:24:09 +02:00
}
// size limit
while ( blobs . size ( ) > 0 & & length ( ) > this . repositorySizeMax ) {
// too large
blobItem oldestBLOB = blobs . remove ( 0 ) ;
2009-03-18 17:14:31 +01:00
oldestBLOB . blob . close ( false ) ;
2009-03-30 17:31:25 +02:00
FileUtils . deletedelete ( oldestBLOB . location ) ;
2008-10-16 23:24:09 +02:00
}
}
/ *
2009-03-18 21:21:19 +01:00
* return the size of the repository ( in bytes )
2008-10-16 23:24:09 +02:00
* /
2009-03-18 21:21:19 +01:00
public synchronized long length ( ) {
2008-10-16 23:24:09 +02:00
long s = 0 ;
for ( int i = 0 ; i < blobs . size ( ) ; i + + ) s + = blobs . get ( i ) . location . length ( ) ;
return s ;
}
2009-01-30 16:33:00 +01:00
public ByteOrder ordering ( ) {
2008-10-20 00:30:44 +02:00
return this . ordering ;
}
2008-08-19 16:10:40 +02:00
private class blobItem {
Date creation ;
File location ;
2009-01-30 23:08:08 +01:00
BLOB blob ;
public blobItem ( Date creation , File location , BLOB blob ) {
2008-08-19 16:10:40 +02:00
this . creation = creation ;
this . location = location ;
this . blob = blob ;
}
2009-01-06 10:38:08 +01:00
public blobItem ( int buffer ) throws IOException {
2008-08-19 16:10:40 +02:00
// make a new blob file and assign it in this item
this . creation = new Date ( ) ;
2009-01-06 10:38:08 +01:00
this . location = newBLOB ( this . creation ) ;
2009-01-30 23:08:08 +01:00
this . blob = ( buffer = = 0 ) ? new BLOBHeapModifier ( location , keylength , ordering ) : new BLOBHeap ( location , keylength , ordering , buffer ) ;
2008-08-19 16:10:40 +02:00
}
}
2009-01-21 19:23:37 +01:00
2008-08-19 16:10:40 +02:00
/ * *
* ask for the length of the primary key
* @return the length of the key
* /
public int keylength ( ) {
return this . keylength ;
}
/ * *
* clears the content of the database
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized void clear ( ) throws IOException {
2009-03-29 23:28:14 +02:00
for ( blobItem bi : blobs ) {
bi . blob . clear ( ) ;
bi . blob . close ( false ) ;
2009-03-30 17:31:25 +02:00
FileUtils . deletedelete ( bi . location ) ;
2009-03-29 23:28:14 +02:00
}
2008-08-19 16:10:40 +02:00
blobs . clear ( ) ;
}
/ * *
2009-03-18 21:21:19 +01:00
* ask for the number of blob entries
2008-08-19 16:10:40 +02:00
* @return the number of entries in the table
* /
2009-03-18 21:21:19 +01:00
public synchronized int size ( ) {
2008-08-19 16:10:40 +02:00
int s = 0 ;
for ( blobItem bi : blobs ) s + = bi . blob . size ( ) ;
return s ;
}
/ * *
* iterator over all keys
* @param up
* @param rotating
* @return
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized CloneableIterator < byte [ ] > keys ( boolean up , boolean rotating ) throws IOException {
2008-08-19 16:10:40 +02:00
assert rotating = false ;
2009-01-30 23:08:08 +01:00
final List < CloneableIterator < byte [ ] > > c = new ArrayList < CloneableIterator < byte [ ] > > ( blobs . size ( ) ) ;
2008-08-19 16:10:40 +02:00
final Iterator < blobItem > i = blobs . iterator ( ) ;
while ( i . hasNext ( ) ) {
c . add ( i . next ( ) . blob . keys ( up , rotating ) ) ;
}
2009-01-30 23:44:20 +01:00
return MergeIterator . cascade ( c , this . ordering , MergeIterator . simpleMerge , up ) ;
2008-08-19 16:10:40 +02:00
}
/ * *
* iterate over all keys
* @param up
* @param firstKey
* @return
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized CloneableIterator < byte [ ] > keys ( boolean up , byte [ ] firstKey ) throws IOException {
2009-01-30 23:08:08 +01:00
final List < CloneableIterator < byte [ ] > > c = new ArrayList < CloneableIterator < byte [ ] > > ( blobs . size ( ) ) ;
2008-08-19 16:10:40 +02:00
final Iterator < blobItem > i = blobs . iterator ( ) ;
while ( i . hasNext ( ) ) {
c . add ( i . next ( ) . blob . keys ( up , firstKey ) ) ;
}
2009-01-30 23:44:20 +01:00
return MergeIterator . cascade ( c , this . ordering , MergeIterator . simpleMerge , up ) ;
2008-08-19 16:10:40 +02:00
}
/ * *
* check if a specific key is in the database
* @param key the primary key
* @return
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized boolean has ( byte [ ] key ) {
2008-08-19 16:10:40 +02:00
for ( blobItem bi : blobs ) if ( bi . blob . has ( key ) ) return true ;
return false ;
}
/ * *
* retrieve the whole BLOB from the table
* @param key the primary key
* @return
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized byte [ ] get ( byte [ ] key ) throws IOException {
2008-08-19 16:10:40 +02:00
byte [ ] b ;
for ( blobItem bi : blobs ) {
b = bi . blob . get ( key ) ;
if ( b ! = null ) return b ;
}
return null ;
}
2009-01-06 10:38:08 +01:00
/ * *
* get all BLOBs in the array .
* this is useful when it is not clear if an entry is unique in all BLOBs in this array .
* @param key
* @return
* @throws IOException
* /
2009-06-02 18:53:45 +02:00
public synchronized Iterable < byte [ ] > getAll ( byte [ ] key ) throws IOException {
/ *
2009-01-06 10:38:08 +01:00
byte [ ] b ;
ArrayList < byte [ ] > l = new ArrayList < byte [ ] > ( blobs . size ( ) ) ;
for ( blobItem bi : blobs ) {
b = bi . blob . get ( key ) ;
if ( b ! = null ) l . add ( b ) ;
}
return l ;
2009-06-02 18:53:45 +02:00
* /
return new BlobValues ( key ) ;
}
public class BlobValues implements Iterator < byte [ ] > , Iterable < byte [ ] > {
private Iterator < blobItem > bii ;
private byte [ ] next ;
private byte [ ] key ;
public BlobValues ( byte [ ] key ) {
this . bii = blobs . iterator ( ) ;
this . key = key ;
this . next = null ;
next0 ( ) ;
}
private void next0 ( ) {
while ( this . bii . hasNext ( ) ) {
BLOB b = this . bii . next ( ) . blob ;
try {
this . next = b . get ( key ) ;
if ( this . next ! = null ) return ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
this . next = null ;
return ;
}
}
this . next = null ;
}
public Iterator < byte [ ] > iterator ( ) {
return this ;
}
public boolean hasNext ( ) {
return this . next ! = null ;
}
public byte [ ] next ( ) {
byte [ ] n = this . next ;
next0 ( ) ;
return n ;
}
public void remove ( ) {
throw new UnsupportedOperationException ( " no remove in BlobValues " ) ;
}
2009-01-06 10:38:08 +01:00
}
2008-10-16 23:24:09 +02:00
/ * *
* retrieve the size of the BLOB
* @param key
* @return the size of the BLOB or - 1 if the BLOB does not exist
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized long length ( byte [ ] key ) throws IOException {
2008-10-16 23:24:09 +02:00
long l ;
for ( blobItem bi : blobs ) {
l = bi . blob . length ( key ) ;
if ( l > = 0 ) return l ;
}
return - 1 ;
}
2008-08-19 16:10:40 +02:00
/ * *
* write a whole byte array as BLOB to the table
* @param key the primary key
* @param b
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized void put ( byte [ ] key , byte [ ] b ) throws IOException {
2008-08-19 16:10:40 +02:00
blobItem bi = ( blobs . size ( ) = = 0 ) ? null : blobs . get ( blobs . size ( ) - 1 ) ;
2008-10-16 23:24:09 +02:00
if ( bi = = null )
System . out . println ( " bi == null " ) ;
else if ( System . currentTimeMillis ( ) - bi . creation . getTime ( ) > this . fileAgeLimit )
System . out . println ( " System.currentTimeMillis() - bi.creation.getTime() > this.maxage " ) ;
else if ( bi . location . length ( ) > this . fileSizeLimit )
System . out . println ( " bi.location.length() > this.maxsize " ) ;
if ( ( bi = = null ) | | ( System . currentTimeMillis ( ) - bi . creation . getTime ( ) > this . fileAgeLimit ) | | ( bi . location . length ( ) > this . fileSizeLimit ) ) {
2008-08-19 16:10:40 +02:00
// add a new blob to the array
2009-01-06 10:38:08 +01:00
bi = new blobItem ( buffersize ) ;
2008-08-19 16:10:40 +02:00
blobs . add ( bi ) ;
}
2009-01-30 23:08:08 +01:00
assert bi . blob instanceof BLOBHeap ;
2008-08-19 16:10:40 +02:00
bi . blob . put ( key , b ) ;
2008-10-16 23:24:09 +02:00
executeLimits ( ) ;
2008-08-19 16:10:40 +02:00
}
2009-01-21 19:23:37 +01:00
/ * *
* replace a BLOB entry with another which must be smaller or same size
* @param key the primary key
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized int replace ( byte [ ] key , Rewriter rewriter ) throws IOException {
2009-01-21 19:23:37 +01:00
int d = 0 ;
for ( blobItem bi : blobs ) {
d + = bi . blob . replace ( key , rewriter ) ;
}
return d ;
}
2008-08-19 16:10:40 +02:00
/ * *
* remove a BLOB
* @param key the primary key
* @throws IOException
* /
2009-03-18 21:21:19 +01:00
public synchronized void remove ( byte [ ] key ) throws IOException {
2008-08-19 16:10:40 +02:00
for ( blobItem bi : blobs ) bi . blob . remove ( key ) ;
}
/ * *
* close the BLOB
* /
2009-03-18 21:21:19 +01:00
public synchronized void close ( boolean writeIDX ) {
2009-03-18 17:14:31 +01:00
for ( blobItem bi : blobs ) bi . blob . close ( writeIDX ) ;
2008-08-19 16:10:40 +02:00
blobs . clear ( ) ;
blobs = null ;
}
2009-05-29 12:03:35 +02:00
public File mergeMount ( File f1 , File f2 , ReferenceFactory < ? extends Reference > factory , Row payloadrow , File newFile , int writeBuffer ) throws IOException {
2009-04-02 15:26:47 +02:00
Log . logInfo ( " BLOBArray " , " merging " + f1 . getName ( ) + " with " + f2 . getName ( ) ) ;
2009-05-04 10:29:44 +02:00
File resultFile = mergeWorker ( factory , this . keylength , this . ordering , f1 , f2 , payloadrow , newFile , writeBuffer ) ;
2009-04-02 15:26:47 +02:00
if ( resultFile = = null ) return null ;
2009-04-03 14:42:24 +02:00
mountBLOB ( resultFile , false ) ;
2009-04-02 15:26:47 +02:00
Log . logInfo ( " BLOBArray " , " merged " + f1 . getName ( ) + " with " + f2 . getName ( ) + " into " + resultFile ) ;
return resultFile ;
}
2009-05-04 10:29:44 +02:00
private static < ReferenceType extends Reference > File mergeWorker ( ReferenceFactory < ReferenceType > factory , int keylength , ByteOrder order , File f1 , File f2 , Row payloadrow , File newFile , int writeBuffer ) throws IOException {
2009-04-02 15:26:47 +02:00
// iterate both files and write a new one
2009-04-15 08:34:27 +02:00
CloneableIterator < ReferenceContainer < ReferenceType > > i1 = new blobFileEntries < ReferenceType > ( f1 , factory , payloadrow ) ;
CloneableIterator < ReferenceContainer < ReferenceType > > i2 = new blobFileEntries < ReferenceType > ( f2 , factory , payloadrow ) ;
2009-04-02 15:26:47 +02:00
if ( ! i1 . hasNext ( ) ) {
if ( i2 . hasNext ( ) ) {
FileUtils . deletedelete ( f1 ) ;
if ( f2 . renameTo ( newFile ) ) return newFile ;
return f2 ;
} else {
FileUtils . deletedelete ( f1 ) ;
FileUtils . deletedelete ( f2 ) ;
return null ;
}
} else if ( ! i2 . hasNext ( ) ) {
FileUtils . deletedelete ( f2 ) ;
if ( f1 . renameTo ( newFile ) ) return newFile ;
return f1 ;
}
assert i1 . hasNext ( ) ;
assert i2 . hasNext ( ) ;
File tmpFile = new File ( newFile . getParentFile ( ) , newFile . getName ( ) + " .tmp " ) ;
2009-05-04 10:29:44 +02:00
HeapWriter writer = new HeapWriter ( tmpFile , newFile , keylength , order , writeBuffer ) ;
2009-04-15 08:34:27 +02:00
merge ( i1 , i2 , order , writer ) ;
2009-04-02 15:26:47 +02:00
try {
writer . close ( true ) ;
// we don't need the old files any more
FileUtils . deletedelete ( f1 ) ;
FileUtils . deletedelete ( f2 ) ;
return newFile ;
} catch ( IOException e ) {
FileUtils . deletedelete ( tmpFile ) ;
FileUtils . deletedelete ( newFile ) ;
e . printStackTrace ( ) ;
return null ;
}
}
2009-04-15 08:34:27 +02:00
private static < ReferenceType extends Reference > void merge ( CloneableIterator < ReferenceContainer < ReferenceType > > i1 , CloneableIterator < ReferenceContainer < ReferenceType > > i2 , ByteOrder ordering , HeapWriter writer ) throws IOException {
2009-04-02 15:26:47 +02:00
assert i1 . hasNext ( ) ;
assert i2 . hasNext ( ) ;
2009-04-15 08:34:27 +02:00
ReferenceContainer < ReferenceType > c1 , c2 , c1o , c2o ;
2009-04-02 15:26:47 +02:00
c1 = i1 . next ( ) ;
c2 = i2 . next ( ) ;
int e ;
while ( true ) {
assert c1 ! = null ;
assert c2 ! = null ;
2009-04-16 17:29:00 +02:00
e = ordering . compare ( c1 . getTermHash ( ) , c2 . getTermHash ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( e < 0 ) {
2009-04-16 17:29:00 +02:00
writer . add ( c1 . getTermHash ( ) , c1 . exportCollection ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( i1 . hasNext ( ) ) {
c1o = c1 ;
c1 = i1 . next ( ) ;
2009-04-16 17:29:00 +02:00
assert ordering . compare ( c1 . getTermHash ( ) , c1o . getTermHash ( ) ) > 0 ;
2009-04-02 15:26:47 +02:00
continue ;
}
break ;
}
if ( e > 0 ) {
2009-04-16 17:29:00 +02:00
writer . add ( c2 . getTermHash ( ) , c2 . exportCollection ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( i2 . hasNext ( ) ) {
c2o = c2 ;
c2 = i2 . next ( ) ;
2009-04-16 17:29:00 +02:00
assert ordering . compare ( c2 . getTermHash ( ) , c2o . getTermHash ( ) ) > 0 ;
2009-04-02 15:26:47 +02:00
continue ;
}
break ;
}
assert e = = 0 ;
// merge the entries
2009-04-16 17:29:00 +02:00
writer . add ( c1 . getTermHash ( ) , ( c1 . merge ( c2 ) ) . exportCollection ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( i1 . hasNext ( ) & & i2 . hasNext ( ) ) {
c1 = i1 . next ( ) ;
c2 = i2 . next ( ) ;
continue ;
}
if ( i1 . hasNext ( ) ) c1 = i1 . next ( ) ;
if ( i2 . hasNext ( ) ) c2 = i2 . next ( ) ;
break ;
}
// catch up remaining entries
assert ! ( i1 . hasNext ( ) & & i2 . hasNext ( ) ) ;
while ( i1 . hasNext ( ) ) {
//System.out.println("FLUSH REMAINING 1: " + c1.getWordHash());
2009-04-16 17:29:00 +02:00
writer . add ( c1 . getTermHash ( ) , c1 . exportCollection ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( i1 . hasNext ( ) ) {
c1o = c1 ;
c1 = i1 . next ( ) ;
2009-04-16 17:29:00 +02:00
assert ordering . compare ( c1 . getTermHash ( ) , c1o . getTermHash ( ) ) > 0 ;
2009-04-02 15:26:47 +02:00
continue ;
}
break ;
}
while ( i2 . hasNext ( ) ) {
//System.out.println("FLUSH REMAINING 2: " + c2.getWordHash());
2009-04-16 17:29:00 +02:00
writer . add ( c2 . getTermHash ( ) , c2 . exportCollection ( ) ) ;
2009-04-02 15:26:47 +02:00
if ( i2 . hasNext ( ) ) {
c2o = c2 ;
c2 = i2 . next ( ) ;
2009-04-16 17:29:00 +02:00
assert ordering . compare ( c2 . getTermHash ( ) , c2o . getTermHash ( ) ) > 0 ;
2009-04-02 15:26:47 +02:00
continue ;
}
break ;
}
// finished with writing
}
2008-10-16 23:24:09 +02:00
public static void main ( final String [ ] args ) {
final File f = new File ( " /Users/admin/blobarraytest " ) ;
try {
//f.delete();
2009-01-30 23:08:08 +01:00
final BLOBArray heap = new BLOBArray ( f , " test " , 12 , NaturalOrder . naturalOrder , 512 * 1024 ) ;
2008-10-16 23:24:09 +02:00
heap . put ( " aaaaaaaaaaaa " . getBytes ( ) , " eins zwei drei " . getBytes ( ) ) ;
heap . put ( " aaaaaaaaaaab " . getBytes ( ) , " vier fuenf sechs " . getBytes ( ) ) ;
heap . put ( " aaaaaaaaaaac " . getBytes ( ) , " sieben acht neun " . getBytes ( ) ) ;
heap . put ( " aaaaaaaaaaad " . getBytes ( ) , " zehn elf zwoelf " . getBytes ( ) ) ;
// iterate over keys
Iterator < byte [ ] > i = heap . keys ( true , false ) ;
while ( i . hasNext ( ) ) {
System . out . println ( " key_b: " + new String ( i . next ( ) ) ) ;
}
heap . remove ( " aaaaaaaaaaab " . getBytes ( ) ) ;
heap . remove ( " aaaaaaaaaaac " . getBytes ( ) ) ;
heap . put ( " aaaaaaaaaaaX " . getBytes ( ) , " WXYZ " . getBytes ( ) ) ;
2009-03-18 17:14:31 +01:00
heap . close ( true ) ;
2008-10-16 23:24:09 +02:00
} catch ( final IOException e ) {
e . printStackTrace ( ) ;
}
}
2008-08-19 16:10:40 +02:00
}