yacy_search_server/source/de/anomic/kelondro/kelondroEcoFS.java
2008-01-25 11:44:27 +00:00

504 lines
19 KiB
Java

// kelondroEcoFS.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 14.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
public class kelondroEcoFS {
/*
* The EcoFS is a flat file with records of fixed length. The file does not contain
* any meta information and the first record starts right at file position 0
* The access rules are in such a way that a minimum of IO operations are necessary
* Two caches provide a mirror to content in the file: a read cache and a write buffer
* The read cache contains a number of entries from the file; a mirror that moves
* whenever information outside the mirror is requested.
* The write buffer always exists only at the end of the file. It contains only records
* that have never been written to the file before. When the write buffer is flushed,
* the file grows
* The record file may also shrink when the last entry of the file is removed.
* Removal of Entries inside the file is not possible, but such entries can be erased
* by overwriting the data with zero bytes
* All access to the file is made with byte[] that are generated outside of this class
* This class only references byte[] that are handed over to methods of this class.
*/
private RandomAccessFile raf;
private File tablefile;
protected int recordsize; // number of bytes in one record
private long cacheindex;
private int cachecount, buffercount; // number of entries in buffer
private byte[] cache, buffer, zero;
private static final int maxBuffer = 4 * 1024; // stay below hard disc cache (is that necessary?)
public kelondroEcoFS(File tablefile, int recordsize) throws IOException {
this.tablefile = tablefile;
this.recordsize = recordsize;
// initialize zero buffer
this.zero = new byte[recordsize];
for (int i = 0; i < recordsize; i++) this.zero[i] = 0;
// initialize table file
if (!tablefile.exists()) {
// make new file
FileOutputStream fos = null;
try {
fos = new FileOutputStream(tablefile);
} catch (FileNotFoundException e) {
// should not happen
e.printStackTrace();
}
try { fos.close(); } catch (IOException e) {}
}
// open an existing table file
try {
raf = new RandomAccessFile(tablefile, "rw");
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
}
// initialize cache and buffer
int maxrecords = Math.max(1, maxBuffer / recordsize);
cache = new byte[maxrecords * recordsize];
buffer = new byte[maxrecords * recordsize];
this.buffercount = 0;
// first-time read of cache
fillCache(0);
}
public static long tableSize(File tablefile, long recordsize) {
// returns number of records in table
if (!tablefile.exists()) return 0;
long size = tablefile.length();
assert size % recordsize == 0;
return size / (long) recordsize;
}
public synchronized long size() throws IOException {
// return the number of records in file plus number of records in buffer
return filesize() + (long) this.buffercount;
}
public File filename() {
return this.tablefile;
}
private long filesize() throws IOException {
return raf.length() / (long) recordsize;
}
private int inCache(long index) {
// checks if the index is inside the cache and returns the index offset inside
// the cache if the index is inside the cache
// returns -1 if the index is not in the cache
if ((index >= this.cacheindex) && (index < this.cacheindex + this.cachecount)) {
return (int) (index - this.cacheindex);
}
return -1;
}
private int inBuffer(long index) throws IOException {
// checks if the index is inside the buffer and returns the index offset inside
// the buffer if the index is inside the buffer
// returns -1 if the index is not in the buffer
long fs = filesize();
if ((index >= fs) && (index < fs + this.buffercount)) {
return (int) (index - fs);
}
return -1;
}
private void fillCache(long index) throws IOException {
// load cache with copy of disc content; start with record at index
// if the record would overlap with the write buffer,
// its start is shifted forward until it fits
// first check if the index is inside the current cache
assert inCache(index) < 0;
if (inCache(index) >= 0) return;
// calculate new start position
long fs = this.filesize();
if (index + this.cache.length / this.recordsize > fs) {
index = fs - this.cache.length / this.recordsize;
}
if (index < 0) index = 0;
// calculate number of records that shall be stored in the cache
this.cachecount = (int) Math.min(this.cache.length / this.recordsize, this.filesize() - index);
assert this.cachecount >= 0;
// check if we need to read 0 bytes from the file
this.cacheindex = index;
if (this.cachecount == 0) return;
// copy records from file to cache
raf.seek((long) this.recordsize * (long) index);
raf.read(this.cache, 0, this.recordsize * this.cachecount);
}
private void flushBuffer() {
// write buffer to end of file
try {
raf.seek(raf.length());
raf.write(this.buffer, 0, this.recordsize * this.buffercount);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
this.buffercount = 0;
}
public synchronized void close() {
flushBuffer();
// then close the file
try {
raf.close();
} catch (IOException e) {
e.printStackTrace();
}
raf = null;
buffer = null;
cache = null;
}
public synchronized void get(long index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.get(" + index + ") outside bounds (" + this.size() + ")");
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
return;
}
assert false;
}
public synchronized void put(long index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize;
if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")");
// check if this is an empty entry
if (isClean(b , start, this.recordsize)) {
clean(index);
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if (p >= 0) {
// write entry to the cache and to the file
System.arraycopy(b, start, this.cache, p * this.recordsize, this.recordsize);
raf.seek((long) index * (long) this.recordsize);
raf.write(b, start, this.recordsize);
return;
}
if (q >= 0) {
// write entry to the buffer
System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize);
return;
}
if (index == size()) {
// append the record to the end of the file;
// look if there is space in the buffer
int bufferpos = (int) (index - filesize());
if (bufferpos >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer
// write buffer
flushBuffer();
// write new entry to buffer
System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1;
} else {
System.arraycopy(b, start, this.buffer, bufferpos * this.recordsize, this.recordsize);
this.buffercount++;
}
assert this.buffercount <= this.buffer.length / this.recordsize;
} else {
// write the record directly to the file,
// do not care about the cache; this case was checked before
raf.seek((long) index * (long) this.recordsize);
raf.write(b, start, this.recordsize);
}
}
public synchronized void add(byte[] b, int start) throws IOException {
put(size(), b, start);
}
private boolean isClean(byte[] b, int offset, int length) {
for (int i = 0; i < length; i++) {
if (b[i + offset] != 0) return false;
}
return true;
}
private boolean isClean(long index) throws IOException {
assert index < size();
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// check entry from the cache
return isClean(this.cache, p * this.recordsize, this.recordsize);
}
if (q >= 0) {
// check entry from the buffer
return isClean(this.buffer, q * this.recordsize, this.recordsize);
}
assert false;
return false;
}
public synchronized void clean(long index, byte[] b, int start) throws IOException {
// removes an entry by cleaning (writing zero bytes to the file)
// the entry that had been at the specific place before is copied to the given array b
// if the last entry in the file was cleaned, the file shrinks by the given record
// this is like
// get(index, b, start);
// put(index, zero, 0);
// plus an additional check if the file should shrink
assert b.length - start >= this.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
if (index == size() - 1) {
cleanLast(b, start);
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
// write zero bytes to the cache and to the file
System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize);
this.raf.seek((long) index * (long) this.recordsize);
this.raf.write(zero, 0, this.recordsize);
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
// write zero to the buffer
System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize);
return;
}
assert false;
}
public synchronized void clean(long index) throws IOException {
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
if (index == size() - 1) {
cleanLast();
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if (p >= 0) {
// write zero bytes to the cache and to the file
System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize);
raf.seek((long) index * (long) this.recordsize);
raf.write(zero, 0, this.recordsize);
return;
}
if (q >= 0) {
// write zero to the buffer
System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize);
return;
}
raf.seek((long) index * (long) this.recordsize);
raf.write(zero, 0, this.recordsize);
}
public synchronized void cleanLast(byte[] b, int start) throws IOException {
cleanLast0(b, start);
long i;
while (((i = size()) > 0) && (isClean(i - 1))) {
//System.out.println("Extra clean/1: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
private synchronized void cleanLast0(byte[] b, int start) throws IOException {
// this is like
// clean(this.size() - 1, b, start);
assert b.length - start >= this.recordsize;
// check if index is inside of cache
int p = inCache(this.size() - 1);
int q = (p >= 0) ? -1 : inBuffer(this.size() - 1);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(this.size() - 1);
p = inCache(this.size() - 1);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
// shrink cache and file
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
this.cachecount--;
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
// shrink buffer
assert this.buffercount > 0;
this.buffercount--;
return;
}
assert false;
}
public synchronized void cleanLast() throws IOException {
cleanLast0();
long i;
while (((i = size()) > 0) && (isClean(i - 1))) {
//System.out.println("Extra clean/0: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
private synchronized void cleanLast0() throws IOException {
// check if index is inside of cache
long p = inCache(this.size() - 1);
long q = (p >= 0) ? -1 : inBuffer(this.size() - 1);
if (p >= 0) {
// shrink cache and file
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
this.cachecount--;
return;
}
if (q >= 0) {
// shrink buffer
assert this.buffercount > 0;
this.buffercount--;
return;
}
// check if file should shrink
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
}
public static void main(String[] args) {
// open a file, add one entry and exit
File f = new File(args[0]);
if (f.exists()) f.delete();
try {
kelondroEcoFS t = new kelondroEcoFS(f, 8);
byte[] b = new byte[8];
t.add("01234567".getBytes(), 0);
t.add("ABCDEFGH".getBytes(), 0);
t.add("abcdefgh".getBytes(), 0);
t.add("--------".getBytes(), 0);
t.add("********".getBytes(), 0);
for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0);
t.add("=======0".getBytes(), 0);
t.add("=======1".getBytes(), 0);
t.add("=======2".getBytes(), 0);
t.cleanLast(b, 0);
System.out.println(new String(b));
t.clean(2, b, 0);
System.out.println(new String(b));
t.get(1, b, 0);
System.out.println(new String(b));
t.put(1, "AbCdEfGh".getBytes(), 0);
t.get(1, b, 0);
System.out.println(new String(b));
t.get(3, b, 0);
System.out.println(new String(b));
t.get(4, b, 0);
System.out.println(new String(b));
System.out.println("size = " + t.size());
t.clean(t.size() - 2);
t.cleanLast();
long start = System.currentTimeMillis();
long c = 0;
for (int i = 0; i < 100000; i++) {
c = t.size();
}
System.out.println("size() needs " + ((System.currentTimeMillis() - start) / 100) + " nanoseconds");
System.out.println("size = " + c);
t.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}