2009-01-01 23:31:16 +01:00
// kelondroBLOBHeapWriter.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.12.2008 on http://yacy.net
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 4558 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-10-10 02:43:25 +02:00
package net.yacy.kelondro.blob ;
2009-01-01 23:31:16 +01:00
import java.io.BufferedOutputStream ;
import java.io.DataOutputStream ;
import java.io.File ;
import java.io.FileOutputStream ;
import java.io.IOException ;
2009-10-10 01:32:08 +02:00
import net.yacy.kelondro.index.HandleMap ;
2009-12-10 00:27:26 +01:00
import net.yacy.kelondro.index.RowSpaceExceededException ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.ByteOrder ;
import net.yacy.kelondro.order.Digest ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.FileUtils ;
2009-10-10 01:13:30 +02:00
2009-01-01 23:31:16 +01:00
2009-01-30 23:08:08 +01:00
public final class HeapWriter {
2009-01-01 23:31:16 +01:00
2009-04-01 14:39:11 +02:00
private int keylength ; // the length of the primary key
2009-09-09 22:49:16 +02:00
private HandleMap index ; // key/seek relation for used records
2009-04-01 14:39:11 +02:00
private final File heapFileTMP ; // the temporary file of the heap during writing
private final File heapFileREADY ; // the final file of the heap when the file is closed
private DataOutputStream os ; // the output stream where the BLOB is written
private long seek ; // the current write position
2009-03-18 17:14:31 +01:00
//private HashSet<String> doublecheck;// only for testing
2009-01-01 23:31:16 +01:00
/ *
* This class implements a BLOB management based on a sequence of records
* The data structure is :
* file : = = record *
* record : = = reclen key blob
* reclen : = = < 4 byte integer = = length of key and blob >
* key : = = < bytes as defined with keylen , if first byte is zero then record is empty >
* blob : = = < bytes of length reclen - keylen >
* that means that each record has the size reclen + 4
*
* Because the blob sizes are stored with integers , one entry may not exceed 2GB
*
* With this class a BLOB file can only be written .
* To read them , use a kelondroBLOBHeapReader .
* A BLOBHeap can be also read and write in random access mode with kelondroBLOBHeap .
* /
/ * *
* create a heap file : a arbitrary number of BLOBs , indexed by an access key
* The heap file will be indexed upon initialization .
2009-04-01 14:39:11 +02:00
* @param temporaryHeapFile
* @param readyHeapFile
2009-01-01 23:31:16 +01:00
* @param keylength
* @param ordering
* @throws IOException
* /
2009-05-04 10:29:44 +02:00
public HeapWriter ( final File temporaryHeapFile , final File readyHeapFile , final int keylength , final ByteOrder ordering , int outBuffer ) throws IOException {
2009-04-01 14:39:11 +02:00
this . heapFileTMP = temporaryHeapFile ;
this . heapFileREADY = readyHeapFile ;
2009-01-01 23:31:16 +01:00
this . keylength = keylength ;
2009-06-07 23:48:01 +02:00
this . index = new HandleMap ( keylength , ordering , 8 , 10 , 100000 ) ;
2009-05-04 10:29:44 +02:00
this . os = new DataOutputStream ( new BufferedOutputStream ( new FileOutputStream ( temporaryHeapFile ) , outBuffer ) ) ;
2009-03-18 17:14:31 +01:00
//this.doublecheck = new HashSet<String>();
2009-01-01 23:31:16 +01:00
this . seek = 0 ;
}
/ * *
* add a BLOB to the heap : this adds the blob always to the end of the file
* newly added heap entries must have keys that have not been added before
* @param key
* @param blob
* @throws IOException
2009-12-10 00:27:26 +01:00
* @throws RowSpaceExceededException
* @throws RowSpaceExceededException
2009-01-01 23:31:16 +01:00
* /
2009-12-10 00:27:26 +01:00
public synchronized void add ( final byte [ ] key , final byte [ ] blob ) throws IOException , RowSpaceExceededException {
2009-03-18 17:14:31 +01:00
//System.out.println("HeapWriter.add: " + new String(key));
2009-01-01 23:31:16 +01:00
assert blob . length > 0 ;
assert key . length = = this . keylength ;
assert index . row ( ) . primaryKeyLength = = key . length : index . row ( ) . primaryKeyLength + " != " + key . length ;
2009-04-01 14:39:11 +02:00
assert index . get ( key ) < 0 : " index.get(key) = " + index . get ( key ) + " , index.size() = " + index . size ( ) + " , file.length() = " + this . heapFileTMP . length ( ) + " , key = " + new String ( key ) ; // must not occur before
2009-01-01 23:31:16 +01:00
if ( ( blob = = null ) | | ( blob . length = = 0 ) ) return ;
2009-12-10 00:27:26 +01:00
index . putUnique ( key , seek ) ;
2009-01-01 23:31:16 +01:00
int chunkl = key . length + blob . length ;
os . writeInt ( chunkl ) ;
os . write ( key ) ;
os . write ( blob ) ;
2009-03-18 17:14:31 +01:00
//assert (this.doublecheck.add(new String(key))) : "doublecheck failed for " + new String(key);
2009-01-01 23:31:16 +01:00
this . seek + = chunkl + 4 ;
}
2009-04-01 14:39:11 +02:00
protected static File fingerprintIndexFile ( File f , String fingerprint ) {
2009-03-30 23:13:53 +02:00
assert f ! = null ;
2009-04-01 14:39:11 +02:00
return new File ( f . getParentFile ( ) , f . getName ( ) + " . " + fingerprint + " .idx " ) ;
2009-01-01 23:31:16 +01:00
}
2009-04-01 14:39:11 +02:00
protected static File fingerprintGapFile ( File f , String fingerprint ) {
2009-03-30 23:13:53 +02:00
assert f ! = null ;
2009-04-01 14:39:11 +02:00
return new File ( f . getParentFile ( ) , f . getName ( ) + " . " + fingerprint + " .gap " ) ;
2009-01-01 23:31:16 +01:00
}
protected static String fingerprintFileHash ( File f ) {
2009-03-30 23:13:53 +02:00
assert f ! = null ;
2009-04-01 14:39:11 +02:00
assert f . exists ( ) : " file = " + f . toString ( ) ;
2009-03-31 18:49:02 +02:00
String fp = Digest . fastFingerprintB64 ( f , false ) ;
assert fp ! = null : " file = " + f . toString ( ) ;
2009-05-28 12:08:36 +02:00
if ( fp = = null ) return null ;
2009-03-31 18:49:02 +02:00
return fp . substring ( 0 , 12 ) ;
2009-01-01 23:31:16 +01:00
}
public static void deleteAllFingerprints ( File f ) {
File d = f . getParentFile ( ) ;
String n = f . getName ( ) ;
String [ ] l = d . list ( ) ;
for ( int i = 0 ; i < l . length ; i + + ) {
2009-05-27 17:04:04 +02:00
if ( l [ i ] . startsWith ( n ) & & ( l [ i ] . endsWith ( " .idx " ) | | l [ i ] . endsWith ( " .gap " ) | | l [ i ] . endsWith ( " .idx.gz " ) | | l [ i ] . endsWith ( " .gap.gz " ) ) ) FileUtils . deletedelete ( new File ( d , l [ i ] ) ) ;
2009-01-01 23:31:16 +01:00
}
}
/ * *
* close the BLOB table
* @throws
* /
2009-04-02 14:24:15 +02:00
public synchronized void close ( boolean writeIDX ) throws IOException {
2009-04-01 14:39:11 +02:00
// close the file
2009-04-02 14:24:15 +02:00
os . flush ( ) ;
os . close ( ) ;
2009-01-01 23:31:16 +01:00
os = null ;
2009-04-01 14:39:11 +02:00
// rename the file into final name
2009-04-02 14:24:15 +02:00
if ( this . heapFileREADY . exists ( ) ) FileUtils . deletedelete ( this . heapFileREADY ) ;
boolean renameok = this . heapFileTMP . renameTo ( this . heapFileREADY ) ;
if ( ! renameok ) throw new IOException ( " cannot rename " + this . heapFileTMP + " to " + this . heapFileREADY ) ;
if ( ! this . heapFileREADY . exists ( ) ) throw new IOException ( " renaming of " + this . heapFileREADY . toString ( ) + " failed: files still exists " ) ;
if ( this . heapFileTMP . exists ( ) ) throw new IOException ( " renaming to " + this . heapFileTMP . toString ( ) + " failed: file does not exist " ) ;
2009-04-01 14:39:11 +02:00
// generate index and gap files
2009-03-18 17:14:31 +01:00
if ( writeIDX & & index . size ( ) > 3 ) {
2009-01-01 23:31:16 +01:00
// now we can create a dump of the index and the gap information
// to speed up the next start
2009-06-19 19:02:50 +02:00
long start = System . currentTimeMillis ( ) ;
String fingerprint = HeapWriter . fingerprintFileHash ( this . heapFileREADY ) ;
if ( fingerprint = = null ) {
Log . logSevere ( " kelondroBLOBHeapWriter " , " cannot write a dump for " + heapFileREADY . getName ( ) + " : fingerprint is null " ) ;
} else {
new Gap ( ) . dump ( fingerprintGapFile ( this . heapFileREADY , fingerprint ) ) ;
index . dump ( fingerprintIndexFile ( this . heapFileREADY , fingerprint ) ) ;
Log . logInfo ( " kelondroBLOBHeapWriter " , " wrote a dump for the " + this . index . size ( ) + " index entries of " + heapFileREADY . getName ( ) + " in " + ( System . currentTimeMillis ( ) - start ) + " milliseconds. " ) ;
2009-01-01 23:31:16 +01:00
}
2009-06-19 19:02:50 +02:00
index . close ( ) ;
index = null ;
2009-01-01 23:31:16 +01:00
} else {
// this is small.. just free resources, do not write index
index . close ( ) ;
index = null ;
}
}
2009-04-01 14:39:11 +02:00
2009-01-01 23:31:16 +01:00
}