mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- removed all InputStream.available() because this does not work for files > 2GB - iterator terminate when a IOException occurs - added handling of non-executing index.add methods to enhance assert usage - added index for file indexes > 2GB, to be used in new indexHeap git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4666 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
94d3d3a86f
commit
696b8ee3f5
|
@ -45,7 +45,7 @@ import java.util.TreeMap;
|
|||
import de.anomic.kelondro.kelondroBase64Order;
|
||||
import de.anomic.kelondro.kelondroBufferedRA;
|
||||
import de.anomic.kelondro.kelondroByteOrder;
|
||||
import de.anomic.kelondro.kelondroBytesIntMap;
|
||||
import de.anomic.kelondro.kelondroBytesLongMap;
|
||||
import de.anomic.kelondro.kelondroCloneableIterator;
|
||||
import de.anomic.kelondro.kelondroException;
|
||||
import de.anomic.kelondro.kelondroFixedWidthArray;
|
||||
|
@ -60,7 +60,7 @@ public final class indexContainerHeap {
|
|||
|
||||
private kelondroRow payloadrow;
|
||||
private serverLog log;
|
||||
private kelondroBytesIntMap index;
|
||||
private kelondroBytesLongMap index;
|
||||
private SortedMap<String, indexContainer> cache;
|
||||
private File backupFile;
|
||||
private boolean readOnlyMode;
|
||||
|
@ -145,27 +145,36 @@ public final class indexContainerHeap {
|
|||
if (log != null) log.logInfo("creating index for rwi heap '" + heapFile.getName() + "'");
|
||||
|
||||
long start = System.currentTimeMillis();
|
||||
this.index = new kelondroBytesIntMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0);
|
||||
this.index = new kelondroBytesLongMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0);
|
||||
DataInputStream is = null;
|
||||
long urlCount = 0;
|
||||
String wordHash;
|
||||
byte[] word = new byte[payloadrow.primaryKeyLength];
|
||||
int seek = 0, seek0;
|
||||
long seek = 0, seek0;
|
||||
synchronized (index) {
|
||||
is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
|
||||
|
||||
while (is.available() > 0) {
|
||||
// dont test available() here because this does not work for files > 2GB
|
||||
loop: while (true) {
|
||||
// remember seek position
|
||||
seek0 = seek;
|
||||
|
||||
// read word
|
||||
is.readFully(word);
|
||||
try {
|
||||
is.readFully(word);
|
||||
} catch (IOException e) {
|
||||
break loop; // terminate loop
|
||||
}
|
||||
wordHash = new String(word);
|
||||
seek += wordHash.length();
|
||||
seek += (long) wordHash.length();
|
||||
|
||||
// read collection
|
||||
seek += kelondroRowSet.skipNextRowSet(is, payloadrow);
|
||||
index.addi(word, seek0);
|
||||
try {
|
||||
seek += (long) kelondroRowSet.skipNextRowSet(is, payloadrow);
|
||||
} catch (IOException e) {
|
||||
break loop; // terminate loop
|
||||
}
|
||||
index.addl(word, seek0);
|
||||
}
|
||||
}
|
||||
is.close();
|
||||
|
@ -222,23 +231,21 @@ public final class indexContainerHeap {
|
|||
DataInputStream is;
|
||||
byte[] word;
|
||||
kelondroRow payloadrow;
|
||||
indexContainer nextContainer;
|
||||
|
||||
public heapFileEntries(File heapFile, kelondroRow payloadrow) throws IOException {
|
||||
if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist");
|
||||
is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
|
||||
word = new byte[payloadrow.primaryKeyLength];
|
||||
this.payloadrow = payloadrow;
|
||||
this.nextContainer = next0();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
try {
|
||||
return is.available() > 0;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
}
|
||||
return this.nextContainer != null;
|
||||
}
|
||||
|
||||
public indexContainer next() {
|
||||
private indexContainer next0() {
|
||||
try {
|
||||
is.readFully(word);
|
||||
return new indexContainer(new String(word), kelondroRowSet.importRowSet(is, payloadrow));
|
||||
|
@ -246,6 +253,12 @@ public final class indexContainerHeap {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public indexContainer next() {
|
||||
indexContainer n = this.nextContainer;
|
||||
this.nextContainer = next0();
|
||||
return n;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("heap dumps are read-only");
|
||||
|
@ -340,7 +353,7 @@ public final class indexContainerHeap {
|
|||
|
||||
// check if the index contains the key
|
||||
try {
|
||||
return index.geti(key.getBytes()) >= 0;
|
||||
return index.getl(key.getBytes()) >= 0;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
|
@ -361,7 +374,7 @@ public final class indexContainerHeap {
|
|||
assert index.row().primaryKeyLength == key.length();
|
||||
|
||||
// check if the index contains the key
|
||||
int pos = index.geti(key.getBytes());
|
||||
long pos = index.getl(key.getBytes());
|
||||
if (pos < 0) return null;
|
||||
|
||||
// access the file and read the container
|
||||
|
|
|
@ -67,13 +67,13 @@ public class kelondroBytesIntMap {
|
|||
return (int) oldentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized void addi(byte[] key, int i) throws IOException {
|
||||
public synchronized boolean addi(byte[] key, int i) throws IOException {
|
||||
assert i >= 0 : "i = " + i;
|
||||
assert (key != null);
|
||||
kelondroRow.Entry newentry = this.rowdef.newEntry();
|
||||
newentry.setCol(0, key);
|
||||
newentry.setCol(1, i);
|
||||
index.addUnique(newentry);
|
||||
return index.addUnique(newentry);
|
||||
}
|
||||
|
||||
public synchronized ArrayList<Integer[]> removeDoubles() throws IOException {
|
||||
|
|
131
source/de/anomic/kelondro/kelondroBytesLongMap.java
Normal file
131
source/de/anomic/kelondro/kelondroBytesLongMap.java
Normal file
|
@ -0,0 +1,131 @@
|
|||
// kelondroBytesLongMap.java
|
||||
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 08.04.2008 on http://yacy.net
|
||||
//
|
||||
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
|
||||
// $LastChangedRevision: 1986 $
|
||||
// $LastChangedBy: orbiter $
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.kelondro;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
|
||||
public class kelondroBytesLongMap {
|
||||
|
||||
private kelondroRow rowdef;
|
||||
private kelondroIndex index;
|
||||
|
||||
public kelondroBytesLongMap(kelondroIndex ki) {
|
||||
assert (ki.row().columns() == 2); // must be a key/index relation
|
||||
assert (ki.row().width(1) == 8); // the value must be a b256-encoded int, 4 bytes long
|
||||
this.index = ki;
|
||||
this.rowdef = ki.row();
|
||||
}
|
||||
|
||||
public kelondroBytesLongMap(int keylength, kelondroByteOrder objectOrder, int space) {
|
||||
this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-8 {b256}")}, objectOrder, 0);
|
||||
this.index = new kelondroRAMIndex(rowdef, space);
|
||||
}
|
||||
|
||||
public kelondroRow row() {
|
||||
return index.row();
|
||||
}
|
||||
|
||||
public synchronized long getl(byte[] key) throws IOException {
|
||||
assert (key != null);
|
||||
kelondroRow.Entry indexentry = index.get(key);
|
||||
if (indexentry == null) return -1;
|
||||
return indexentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized long putl(byte[] key, long l) throws IOException {
|
||||
assert l >= 0 : "l = " + l;
|
||||
assert (key != null);
|
||||
kelondroRow.Entry newentry = index.row().newEntry();
|
||||
newentry.setCol(0, key);
|
||||
newentry.setCol(1, l);
|
||||
kelondroRow.Entry oldentry = index.put(newentry);
|
||||
if (oldentry == null) return -1;
|
||||
return oldentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized boolean addl(byte[] key, long l) throws IOException {
|
||||
assert l >= 0 : "l = " + l;
|
||||
assert (key != null);
|
||||
kelondroRow.Entry newentry = this.rowdef.newEntry();
|
||||
newentry.setCol(0, key);
|
||||
newentry.setCol(1, l);
|
||||
return index.addUnique(newentry);
|
||||
}
|
||||
|
||||
public synchronized ArrayList<Long[]> removeDoubles() throws IOException {
|
||||
ArrayList<kelondroRowSet> indexreport = index.removeDoubles();
|
||||
ArrayList<Long[]> report = new ArrayList<Long[]>();
|
||||
Long[] is;
|
||||
Iterator<kelondroRow.Entry> ei;
|
||||
int c;
|
||||
for (kelondroRowSet rowset: indexreport) {
|
||||
is = new Long[rowset.size()];
|
||||
ei = rowset.rows();
|
||||
c = 0;
|
||||
while (ei.hasNext()) {
|
||||
is[c++] = new Long(ei.next().getColLong(1));
|
||||
}
|
||||
report.add(is);
|
||||
}
|
||||
return report;
|
||||
}
|
||||
|
||||
public synchronized long removel(byte[] key) throws IOException {
|
||||
assert (key != null);
|
||||
kelondroRow.Entry indexentry = index.remove(key, true); // keeping the order will prevent multiple re-sorts
|
||||
if (indexentry == null) return -1;
|
||||
return indexentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized long removeonel() throws IOException {
|
||||
kelondroRow.Entry indexentry = index.removeOne();
|
||||
if (indexentry == null) return -1;
|
||||
return indexentry.getColLong(1);
|
||||
}
|
||||
|
||||
public synchronized int size() {
|
||||
return index.size();
|
||||
}
|
||||
|
||||
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
|
||||
return index.keys(up, firstKey);
|
||||
}
|
||||
|
||||
public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(boolean up, byte[] firstKey) throws IOException {
|
||||
return index.rows(up, firstKey);
|
||||
}
|
||||
|
||||
public kelondroProfile profile() {
|
||||
return index.profile();
|
||||
}
|
||||
|
||||
public synchronized void close() {
|
||||
index.close();
|
||||
index = null;
|
||||
}
|
||||
|
||||
}
|
|
@ -294,7 +294,7 @@ public class kelondroCache implements kelondroIndex {
|
|||
throw new UnsupportedOperationException("put with date is inefficient in kelondroCache");
|
||||
}
|
||||
|
||||
public synchronized void addUnique(Entry row) throws IOException {
|
||||
public synchronized boolean addUnique(Entry row) throws IOException {
|
||||
assert (row != null);
|
||||
assert (row.columns() == row().columns());
|
||||
//assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
|
||||
|
@ -307,20 +307,21 @@ public class kelondroCache implements kelondroIndex {
|
|||
this.readMissCache.remove(key, true);
|
||||
this.hasnotDelete++;
|
||||
// the entry does not exist before
|
||||
index.addUnique(row); // write to backend
|
||||
if (readHitCache != null) {
|
||||
boolean added = index.addUnique(row); // write to backend
|
||||
if (added && (readHitCache != null)) {
|
||||
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
|
||||
if (dummy == null) this.writeUnique++; else this.writeDouble++;
|
||||
}
|
||||
return;
|
||||
return added;
|
||||
}
|
||||
|
||||
// the worst case: we must write to the back-end directly
|
||||
index.addUnique(row);
|
||||
if (readHitCache != null) {
|
||||
boolean added = index.addUnique(row);
|
||||
if (added && (readHitCache != null)) {
|
||||
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
|
||||
if (dummy == null) this.writeUnique++; else this.writeDouble++;
|
||||
}
|
||||
return added;
|
||||
}
|
||||
|
||||
public synchronized void addUnique(Entry row, Date entryDate) throws IOException {
|
||||
|
@ -349,9 +350,13 @@ public class kelondroCache implements kelondroIndex {
|
|||
}
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<Entry> rows) throws IOException {
|
||||
public synchronized int addUniqueMultiple(List<Entry> rows) throws IOException {
|
||||
Iterator<Entry> i = rows.iterator();
|
||||
while (i.hasNext()) addUnique((Entry) i.next());
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
if (addUnique((Entry) i.next())) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
|
||||
|
|
|
@ -34,8 +34,6 @@ import java.io.IOException;
|
|||
import java.io.RandomAccessFile;
|
||||
import java.util.Iterator;
|
||||
|
||||
import de.anomic.server.logging.serverLog;
|
||||
|
||||
/**
|
||||
* The EcoFS is a flat file with records of fixed length. The file does not contain
|
||||
* any meta information and the first record starts right at file position 0
|
||||
|
@ -530,10 +528,9 @@ public class kelondroEcoFS {
|
|||
|
||||
public static class ChunkIterator implements Iterator<byte[]> {
|
||||
|
||||
private int recordsize, chunksize, chunkcounter;
|
||||
private int recordsize, chunksize;
|
||||
private DataInputStream stream;
|
||||
private serverLog log;
|
||||
private File file;
|
||||
private byte[] nextBytes;
|
||||
|
||||
/**
|
||||
* create a ChunkIterator
|
||||
|
@ -544,27 +541,20 @@ public class kelondroEcoFS {
|
|||
* @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped
|
||||
* @throws FileNotFoundException
|
||||
*/
|
||||
public ChunkIterator(File file, int recordsize, int chunksize, serverLog log) throws FileNotFoundException {
|
||||
public ChunkIterator(File file, int recordsize, int chunksize) throws FileNotFoundException {
|
||||
assert (file.exists());
|
||||
assert file.length() % recordsize == 0;
|
||||
this.recordsize = recordsize;
|
||||
this.chunksize = chunksize;
|
||||
this.chunkcounter = 0; // only for logging
|
||||
this.stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file), 64 * 1024));
|
||||
this.log = log;
|
||||
this.file = file;
|
||||
this.nextBytes = next0();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
try {
|
||||
return stream != null && stream.available() > 0;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return false;
|
||||
}
|
||||
return nextBytes != null;
|
||||
}
|
||||
|
||||
public byte[] next() {
|
||||
public byte[] next0() {
|
||||
byte[] chunk = new byte[chunksize];
|
||||
int r, s;
|
||||
try {
|
||||
|
@ -579,16 +569,16 @@ public class kelondroEcoFS {
|
|||
}
|
||||
return chunk;
|
||||
} catch (IOException e) {
|
||||
if (log == null) {
|
||||
serverLog.logWarning("kelondroEcoFS", "ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e);
|
||||
} else {
|
||||
log.logWarning("ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e);
|
||||
}
|
||||
this.stream = null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] next() {
|
||||
byte[] n = this.nextBytes;
|
||||
this.nextBytes = next0();
|
||||
return n;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
|
|
@ -65,6 +65,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
kelondroBytesIntMap index;
|
||||
kelondroBufferedEcoFS file;
|
||||
kelondroRow rowdef;
|
||||
int fail;
|
||||
|
||||
kelondroRow taildef;
|
||||
private int buffersize;
|
||||
|
@ -72,6 +73,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
public kelondroEcoTable(File tablefile, kelondroRow rowdef, int useTailCache, int buffersize, int initialSpace) {
|
||||
this.rowdef = rowdef;
|
||||
this.buffersize = buffersize;
|
||||
this.fail = 0;
|
||||
assert rowdef.primaryKeyIndex == 0;
|
||||
// define the taildef, a row like the rowdef but without the first column
|
||||
kelondroColumn[] cols = new kelondroColumn[rowdef.columns() - 1];
|
||||
|
@ -129,8 +131,9 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
// write the key into the index table
|
||||
assert key != null;
|
||||
if (key == null) {i++; continue;}
|
||||
index.addi(key, i++);
|
||||
|
||||
if (!index.addi(key, i++)) fail++;
|
||||
assert index.size() + fail == i : "index.size() = " + index.size() + ", i = " + i + ", fail = " + fail + ", key = '" + new String(key) + "'";
|
||||
|
||||
if ((i % 10000) == 0) {
|
||||
System.out.print('.');
|
||||
System.out.flush();
|
||||
|
@ -139,7 +142,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
} else {
|
||||
byte[] record;
|
||||
key = new byte[rowdef.primaryKeyLength];
|
||||
Iterator<byte[]> ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize, null);
|
||||
Iterator<byte[]> ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize);
|
||||
while (ri.hasNext()) {
|
||||
record = ri.next();
|
||||
assert record != null;
|
||||
|
@ -147,8 +150,8 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
|
||||
|
||||
// write the key into the index table
|
||||
index.addi(key, i++);
|
||||
|
||||
if (!index.addi(key, i++)) fail++;
|
||||
|
||||
// write the tail into the table
|
||||
table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true));
|
||||
|
||||
|
@ -164,6 +167,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
System.out.flush();
|
||||
this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize);
|
||||
ArrayList<Integer[]> doubles = index.removeDoubles();
|
||||
assert index.size() + doubles.size() + fail == i;
|
||||
System.out.println(" -removed " + doubles.size() + " doubles- done.");
|
||||
if (doubles.size() > 0) {
|
||||
System.out.println("DEBUG " + tablefile + ": WARNING - EcoTable " + tablefile + " has " + doubles.size() + " doubles");
|
||||
|
@ -174,7 +178,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
for (Integer[] ds: doubles) {
|
||||
file.get(ds[0].longValue(), record, 0);
|
||||
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
|
||||
index.addi(key, ds[0].intValue());
|
||||
if (!index.addi(key, ds[0].intValue())) fail++;
|
||||
}
|
||||
// then remove the other doubles by removing them from the table, but do a re-indexing while doing that
|
||||
// first aggregate all the delete positions because the elements from the top positions must be removed first
|
||||
|
@ -190,6 +194,12 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
removeInFile(top.intValue());
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
assert file.size() == index.size() + doubles.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
// should never happen
|
||||
e.printStackTrace();
|
||||
|
@ -198,11 +208,6 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
e.printStackTrace();
|
||||
throw new kelondroException(e.getMessage());
|
||||
}
|
||||
try {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
// track this table
|
||||
tableTracker.put(tablefile.toString(), this);
|
||||
|
@ -217,7 +222,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
*/
|
||||
public Iterator<byte[]> keyIterator(File file, kelondroRow rowdef) throws FileNotFoundException {
|
||||
assert rowdef.primaryKeyIndex == 0;
|
||||
return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength, null);
|
||||
return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength);
|
||||
}
|
||||
|
||||
public static long tableSize(File tablefile, int recordsize) {
|
||||
|
@ -254,26 +259,30 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
return (int) ((rowdef.primaryKeyLength + 4) * tableSize(f, rowdef.objectsize) * kelondroRowCollection.growfactor);
|
||||
}
|
||||
|
||||
public synchronized void addUnique(Entry row) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
public synchronized boolean addUnique(Entry row) throws IOException {
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
int i = (int) file.size();
|
||||
index.addi(row.getPrimaryKeyBytes(), i);
|
||||
boolean added = index.addi(row.getPrimaryKeyBytes(), i);
|
||||
if (!added) return false;
|
||||
if (table != null) {
|
||||
assert table.size() == i;
|
||||
table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
|
||||
}
|
||||
file.put(i, row.bytes(), 0);
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
return true;
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<Entry> rows) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
public synchronized int addUniqueMultiple(List<Entry> rows) throws IOException {
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
Iterator<Entry> i = rows.iterator();
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
addUnique(i.next());
|
||||
if (addUnique(i.next())) c++;
|
||||
}
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
return c;
|
||||
}
|
||||
|
||||
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
|
||||
|
@ -318,7 +327,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
}
|
||||
|
||||
public synchronized Entry get(byte[] key) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
int i = index.geti(key);
|
||||
if (i == -1) return null;
|
||||
|
@ -334,13 +343,13 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
System.arraycopy(key, 0, b, 0, key.length);
|
||||
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength);
|
||||
}
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
return rowdef.newEntry(b);
|
||||
}
|
||||
|
||||
public synchronized boolean has(byte[] key) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
return index.geti(key) >= 0;
|
||||
}
|
||||
|
@ -354,7 +363,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
}
|
||||
|
||||
public synchronized Entry put(Entry row) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
assert row != null;
|
||||
assert row.bytes() != null;
|
||||
|
@ -381,7 +390,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
table.set(i, taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
|
||||
file.put(i, row.bytes(), 0);
|
||||
}
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
// return old value
|
||||
return rowdef.newEntry(b);
|
||||
|
@ -392,12 +401,12 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
}
|
||||
|
||||
public synchronized void putMultiple(List<Entry> rows) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
Iterator<Entry> i = rows.iterator();
|
||||
while (i.hasNext()) {
|
||||
put(i.next());
|
||||
}
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
}
|
||||
|
||||
private void removeInFile(int i) throws IOException {
|
||||
|
@ -433,7 +442,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
}
|
||||
|
||||
public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
assert keepOrder == false; // this class cannot keep the order during a remove
|
||||
assert key.length == rowdef.primaryKeyLength;
|
||||
|
@ -462,7 +471,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
|
||||
index.puti(k, i);
|
||||
}
|
||||
assert (file.size() == index.size());
|
||||
assert (file.size() == index.size() + fail);
|
||||
} else {
|
||||
// get result value from the table copy, so we don't need to read it from the file
|
||||
kelondroRow.Entry v = table.get(i);
|
||||
|
@ -488,17 +497,17 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
kelondroRow.Entry lr = rowdef.newEntry(p);
|
||||
index.puti(lr.getPrimaryKeyBytes(), i);
|
||||
}
|
||||
assert (file.size() == index.size());
|
||||
assert (file.size() == index.size() + fail);
|
||||
assert (table.size() == index.size()) : "table.size() = " + table.size() + ", index.size() = " + index.size();
|
||||
}
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
assert index.size() + 1 == sb : "index.size() = " + index.size() + ", sb = " + sb;
|
||||
return rowdef.newEntry(b);
|
||||
}
|
||||
|
||||
public synchronized Entry removeOne() throws IOException {
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert ((table == null) || (table.size() == index.size()));
|
||||
byte[] le = new byte[rowdef.objectsize];
|
||||
file.cleanLast(le, 0);
|
||||
|
@ -506,7 +515,7 @@ public class kelondroEcoTable implements kelondroIndex {
|
|||
int i = index.removei(lr.getPrimaryKeyBytes());
|
||||
assert i >= 0;
|
||||
if (table != null) table.removeOne();
|
||||
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
|
||||
return lr;
|
||||
}
|
||||
|
||||
|
|
|
@ -299,13 +299,13 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
|||
return oldentry;
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
|
||||
assert row.objectsize() == this.rowdef.objectsize;
|
||||
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
|
||||
index.addi(row.getColBytes(0), super.add(row));
|
||||
return index.addi(row.getColBytes(0), super.add(row));
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
// add a list of entries in a ordered way.
|
||||
// this should save R/W head positioning time
|
||||
TreeMap<Integer, byte[]> indexed_result = super.addMultiple(rows);
|
||||
|
@ -318,7 +318,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
|
|||
index.puti(entry.getValue(), entry.getKey().intValue());
|
||||
}
|
||||
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
|
||||
|
||||
return indexed_result.size();
|
||||
}
|
||||
|
||||
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
|
||||
|
|
|
@ -66,8 +66,8 @@ public interface kelondroIndex {
|
|||
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
|
||||
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
|
||||
public void putMultiple(List<kelondroRow.Entry> rows) throws IOException; // for R/W head path optimization
|
||||
public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check
|
||||
public void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException; // no double-check
|
||||
public boolean addUnique(kelondroRow.Entry row) throws IOException; // no double-check
|
||||
public int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException; // no double-check
|
||||
public ArrayList<kelondroRowSet> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
|
||||
public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException;
|
||||
public kelondroRow.Entry removeOne() throws IOException;
|
||||
|
|
|
@ -105,22 +105,24 @@ public class kelondroRAMIndex implements kelondroIndex {
|
|||
}
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry entry) {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry entry) {
|
||||
assert (entry != null);
|
||||
if (index1 == null) {
|
||||
// we are in the initialization phase
|
||||
index0.addUnique(entry);
|
||||
return index0.addUnique(entry);
|
||||
} else {
|
||||
// initialization is over, add to secondary index
|
||||
index1.addUnique(entry);
|
||||
return index1.addUnique(entry);
|
||||
}
|
||||
}
|
||||
|
||||
public void addUniqueMultiple(List<Entry> rows) {
|
||||
public int addUniqueMultiple(List<Entry> rows) {
|
||||
Iterator<Entry> i = rows.iterator();
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
addUnique(i.next());
|
||||
if (addUnique(i.next())) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public synchronized ArrayList<kelondroRowSet> removeDoubles() {
|
||||
|
|
|
@ -296,22 +296,26 @@ public class kelondroRowCollection {
|
|||
set(index, a);
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row) {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row) {
|
||||
byte[] r = row.bytes();
|
||||
addUnique(r, 0, r.length);
|
||||
return addUnique(r, 0, r.length);
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) {
|
||||
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) {
|
||||
assert this.sortBound == 0 : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
|
||||
Iterator<kelondroRow.Entry> i = rows.iterator();
|
||||
while (i.hasNext()) addUnique(i.next());
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
if (addUnique(i.next())) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public synchronized void add(byte[] a) {
|
||||
addUnique(a, 0, a.length);
|
||||
}
|
||||
|
||||
private final void addUnique(byte[] a, int astart, int alength) {
|
||||
private final boolean addUnique(byte[] a, int astart, int alength) {
|
||||
assert (a != null);
|
||||
assert (astart >= 0) && (astart < a.length) : " astart = " + a;
|
||||
assert (!(serverLog.allZero(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength);
|
||||
|
@ -319,7 +323,7 @@ public class kelondroRowCollection {
|
|||
assert (astart + alength <= a.length);
|
||||
if (bugappearance(a, astart, alength)) {
|
||||
System.out.println("*** DEBUG: patched wrong a = " + serverLog.arrayList(a, astart, alength));
|
||||
return; // TODO: this is temporary; remote peers may still submit bad entries
|
||||
return false; // TODO: this is temporary; remote peers may still submit bad entries
|
||||
}
|
||||
assert (!(bugappearance(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength);
|
||||
int l = Math.min(rowdef.objectsize, Math.min(alength, a.length - astart));
|
||||
|
@ -327,6 +331,7 @@ public class kelondroRowCollection {
|
|||
System.arraycopy(a, astart, chunkcache, rowdef.objectsize * chunkcount, l);
|
||||
chunkcount++;
|
||||
this.lastTimeWrote = System.currentTimeMillis();
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean bugappearance(byte[] a, int astart, int alength) {
|
||||
|
|
|
@ -209,7 +209,7 @@ public class kelondroSQLTable implements kelondroIndex {
|
|||
}
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
@ -217,7 +217,7 @@ public class kelondroSQLTable implements kelondroIndex {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -308,15 +308,15 @@ public class kelondroSplitTable implements kelondroIndex {
|
|||
return null;
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
|
||||
addUnique(row, null);
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
|
||||
return addUnique(row, null);
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
|
||||
assert row.objectsize() <= this.rowdef.objectsize;
|
||||
if ((entryDate == null) || (entryDate.after(new Date()))) entryDate = new Date(); // fix date
|
||||
String suffix = dateSuffix(entryDate);
|
||||
if (suffix == null) return;
|
||||
if (suffix == null) return false;
|
||||
kelondroIndex table = (kelondroIndex) tables.get(suffix);
|
||||
if (table == null) {
|
||||
// make new table
|
||||
|
@ -329,12 +329,16 @@ public class kelondroSplitTable implements kelondroIndex {
|
|||
}
|
||||
tables.put(suffix, table);
|
||||
}
|
||||
table.addUnique(row);
|
||||
return table.addUnique(row);
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
Iterator<kelondroRow.Entry> i = rows.iterator();
|
||||
while (i.hasNext()) addUnique(i.next());
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
if (addUnique(i.next())) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows, Date entryDate) throws IOException {
|
||||
|
|
|
@ -489,17 +489,23 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
|
|||
return result;
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
|
||||
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
|
||||
int s = this.size();
|
||||
this.put(row);
|
||||
return this.size() > s;
|
||||
}
|
||||
|
||||
public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
|
||||
this.put(row, entryDate);
|
||||
}
|
||||
|
||||
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
|
||||
Iterator<kelondroRow.Entry> i = rows.iterator();
|
||||
while (i.hasNext()) addUnique(i.next());
|
||||
int c = 0;
|
||||
while (i.hasNext()) {
|
||||
if (addUnique(i.next())) c++;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
private void assignChild(kelondroNode parentNode, kelondroNode childNode, int childType) throws IOException {
|
||||
|
|
|
@ -52,7 +52,6 @@ import java.net.MalformedURLException;
|
|||
import java.net.NoRouteToHostException;
|
||||
import java.net.SocketException;
|
||||
import java.net.UnknownHostException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
|
||||
import de.anomic.http.HttpClient;
|
||||
|
|
|
@ -181,7 +181,7 @@ public final class plasmaCrawlStacker extends Thread {
|
|||
|
||||
public void close() {
|
||||
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
|
||||
this.log.logFine("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait.");
|
||||
this.log.logInfo("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait.");
|
||||
while (size() > 0) {
|
||||
if (!job()) break;
|
||||
}
|
||||
|
|
|
@ -480,9 +480,7 @@ public final class serverCore extends serverAbstractBusyThread implements server
|
|||
Thread.interrupted();
|
||||
|
||||
// shut down all busySessions
|
||||
for (Session session: this.busySessions) {
|
||||
try {session.notify();} catch (IllegalMonitorStateException e) {e.printStackTrace();}
|
||||
try {session.notifyAll();} catch (IllegalMonitorStateException e) {e.printStackTrace();}
|
||||
if (this.busySessions != null) for (Session session: this.busySessions) {
|
||||
try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ WORDMIGRATION.level = FINE
|
|||
FILEHANDLER.level = INFO
|
||||
SESSION-POOL.level = INFO
|
||||
CRAWLER-POOL.level = INFO
|
||||
STACKCRAWL-POOL.level = INFO
|
||||
STACKCRAWL.level = INFO
|
||||
MEMORY.level = INFO
|
||||
|
||||
# List of global handlers
|
||||
|
|
Loading…
Reference in New Issue
Block a user