2009-01-01 23:31:16 +01:00
// kelondroBLOBHeapWriter.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.12.2008 on http://yacy.net
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 4558 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-01-30 23:08:08 +01:00
package de.anomic.kelondro.blob ;
2009-01-01 23:31:16 +01:00
import java.io.BufferedOutputStream ;
import java.io.DataOutputStream ;
import java.io.File ;
import java.io.FileOutputStream ;
import java.io.IOException ;
2009-03-02 11:00:32 +01:00
import de.anomic.kelondro.index.LongHandleIndex ;
2009-01-30 23:08:08 +01:00
import de.anomic.kelondro.order.ByteOrder ;
import de.anomic.kelondro.order.Digest ;
2009-03-30 17:31:25 +02:00
import de.anomic.kelondro.util.FileUtils ;
2009-01-31 00:33:47 +01:00
import de.anomic.kelondro.util.Log ;
2009-01-01 23:31:16 +01:00
2009-01-30 23:08:08 +01:00
public final class HeapWriter {
2009-01-01 23:31:16 +01:00
2009-03-18 17:14:31 +01:00
private int keylength ; // the length of the primary key
2009-03-02 11:00:32 +01:00
private LongHandleIndex index ; // key/seek relation for used records
2009-03-18 17:14:31 +01:00
private final File heapFile ; // the file of the heap
private DataOutputStream os ; // the output stream where the BLOB is written
private long seek ; // the current write position
//private HashSet<String> doublecheck;// only for testing
2009-01-01 23:31:16 +01:00
/ *
* This class implements a BLOB management based on a sequence of records
* The data structure is :
* file : = = record *
* record : = = reclen key blob
* reclen : = = < 4 byte integer = = length of key and blob >
* key : = = < bytes as defined with keylen , if first byte is zero then record is empty >
* blob : = = < bytes of length reclen - keylen >
* that means that each record has the size reclen + 4
*
* Because the blob sizes are stored with integers , one entry may not exceed 2GB
*
* With this class a BLOB file can only be written .
* To read them , use a kelondroBLOBHeapReader .
* A BLOBHeap can be also read and write in random access mode with kelondroBLOBHeap .
* /
/ * *
* create a heap file : a arbitrary number of BLOBs , indexed by an access key
* The heap file will be indexed upon initialization .
* @param heapFile
* @param keylength
* @param ordering
* @throws IOException
* /
2009-01-30 23:08:08 +01:00
public HeapWriter ( final File heapFile , final int keylength , final ByteOrder ordering ) throws IOException {
2009-01-01 23:31:16 +01:00
this . heapFile = heapFile ;
this . keylength = keylength ;
2009-03-13 11:07:04 +01:00
this . index = new LongHandleIndex ( keylength , ordering , 10 , 100000 ) ;
2009-03-18 23:19:08 +01:00
this . os = new DataOutputStream ( new BufferedOutputStream ( new FileOutputStream ( heapFile ) , 8 * 1024 * 1024 ) ) ;
2009-03-18 17:14:31 +01:00
//this.doublecheck = new HashSet<String>();
2009-01-01 23:31:16 +01:00
this . seek = 0 ;
}
/ * *
* add a BLOB to the heap : this adds the blob always to the end of the file
* newly added heap entries must have keys that have not been added before
* @param key
* @param blob
* @throws IOException
* /
2009-03-18 17:14:31 +01:00
public synchronized void add ( final byte [ ] key , final byte [ ] blob ) throws IOException {
//System.out.println("HeapWriter.add: " + new String(key));
2009-01-01 23:31:16 +01:00
assert blob . length > 0 ;
assert key . length = = this . keylength ;
assert index . row ( ) . primaryKeyLength = = key . length : index . row ( ) . primaryKeyLength + " != " + key . length ;
2009-03-18 17:14:31 +01:00
assert index . get ( key ) < 0 : " index.get(key) = " + index . get ( key ) + " , index.size() = " + index . size ( ) + " , file.length() = " + this . heapFile . length ( ) + " , key = " + new String ( key ) ; // must not occur before
2009-01-01 23:31:16 +01:00
if ( ( blob = = null ) | | ( blob . length = = 0 ) ) return ;
int chunkl = key . length + blob . length ;
os . writeInt ( chunkl ) ;
os . write ( key ) ;
os . write ( blob ) ;
2009-03-08 22:37:17 +01:00
index . putUnique ( key , seek ) ;
2009-03-18 17:14:31 +01:00
//assert (this.doublecheck.add(new String(key))) : "doublecheck failed for " + new String(key);
2009-01-01 23:31:16 +01:00
this . seek + = chunkl + 4 ;
}
protected static File fingerprintIndexFile ( File f ) {
return new File ( f . getParentFile ( ) , f . getName ( ) + " . " + fingerprintFileHash ( f ) + " .idx " ) ;
}
protected static File fingerprintGapFile ( File f ) {
return new File ( f . getParentFile ( ) , f . getName ( ) + " . " + fingerprintFileHash ( f ) + " .gap " ) ;
}
protected static String fingerprintFileHash ( File f ) {
2009-01-30 16:33:00 +01:00
return Digest . fastFingerprintB64 ( f , false ) . substring ( 0 , 12 ) ;
2009-01-01 23:31:16 +01:00
}
public static void deleteAllFingerprints ( File f ) {
File d = f . getParentFile ( ) ;
String n = f . getName ( ) ;
String [ ] l = d . list ( ) ;
for ( int i = 0 ; i < l . length ; i + + ) {
2009-03-30 17:31:25 +02:00
if ( l [ i ] . startsWith ( n ) & & ( l [ i ] . endsWith ( " .idx " ) | | l [ i ] . endsWith ( " .gap " ) ) ) FileUtils . deletedelete ( new File ( d , l [ i ] ) ) ;
2009-01-01 23:31:16 +01:00
}
}
/ * *
* close the BLOB table
* @throws
* /
2009-03-18 17:14:31 +01:00
public synchronized void close ( boolean writeIDX ) {
2009-01-01 23:31:16 +01:00
try {
os . flush ( ) ;
os . close ( ) ;
} catch ( final IOException e ) {
e . printStackTrace ( ) ;
}
os = null ;
2009-03-18 17:14:31 +01:00
if ( writeIDX & & index . size ( ) > 3 ) {
2009-01-01 23:31:16 +01:00
// now we can create a dump of the index and the gap information
// to speed up the next start
try {
long start = System . currentTimeMillis ( ) ;
2009-01-30 23:08:08 +01:00
new Gap ( ) . dump ( fingerprintGapFile ( this . heapFile ) ) ;
2009-01-01 23:31:16 +01:00
index . dump ( fingerprintIndexFile ( this . heapFile ) ) ;
2009-01-31 00:33:47 +01:00
Log . logInfo ( " kelondroBLOBHeapWriter " , " wrote a dump for the " + this . index . size ( ) + " index entries of " + heapFile . getName ( ) + " in " + ( System . currentTimeMillis ( ) - start ) + " milliseconds. " ) ;
2009-01-01 23:31:16 +01:00
index . close ( ) ;
index = null ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
}
} else {
// this is small.. just free resources, do not write index
index . close ( ) ;
index = null ;
}
}
}