mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
1st step to migrate kelondroTree to usage of kelondroRow instead of byte[][]
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2162 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
572d53506c
commit
4a907a570f
|
@ -15,6 +15,7 @@ import de.anomic.kelondro.kelondroBase64Order;
|
||||||
import de.anomic.kelondro.kelondroIndex;
|
import de.anomic.kelondro.kelondroIndex;
|
||||||
import de.anomic.kelondro.kelondroSplittedTree;
|
import de.anomic.kelondro.kelondroSplittedTree;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.ymage.ymageChart;
|
import de.anomic.ymage.ymageChart;
|
||||||
import de.anomic.ymage.ymagePNGEncoderAWT;
|
import de.anomic.ymage.ymagePNGEncoderAWT;
|
||||||
import de.anomic.server.serverMemory;
|
import de.anomic.server.serverMemory;
|
||||||
|
@ -131,10 +132,10 @@ public class dbtest {
|
||||||
public void run() {
|
public void run() {
|
||||||
final STEntry entry = new STEntry(this.getSource());
|
final STEntry entry = new STEntry(this.getSource());
|
||||||
try {
|
try {
|
||||||
final byte[][] entryBytes = getTable().get(entry.getKey());
|
final kelondroRow.Entry entryBytes = getTable().get(entry.getKey());
|
||||||
if (entryBytes != null) {
|
if (entryBytes != null) {
|
||||||
System.out.println("ENTRY=" + new String(entryBytes[1]));
|
System.out.println("ENTRY=" + entryBytes.getColString(1, null));
|
||||||
final STEntry dbEntry = new STEntry(entryBytes[0], entryBytes[1]);
|
final STEntry dbEntry = new STEntry(entryBytes.getColBytes(0), entryBytes.getColBytes(1));
|
||||||
if (!dbEntry.isValid()) {
|
if (!dbEntry.isValid()) {
|
||||||
System.out.println(dbEntry);
|
System.out.println(dbEntry);
|
||||||
} else {
|
} else {
|
||||||
|
@ -353,7 +354,7 @@ final class dbTable implements kelondroIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public byte[][] get(byte[] key) throws IOException {
|
public kelondroRow.Entry get(byte[] key) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class kelondroArray extends kelondroRecords {
|
||||||
|
|
||||||
// define the Over-Head-Array
|
// define the Over-Head-Array
|
||||||
private static short thisOHBytes = 0; // our record definition does not need extra bytes
|
private static short thisOHBytes = 0; // our record definition does not need extra bytes
|
||||||
private static short thisOHHandles = 0; // and two handles overhead for a double-chained list
|
private static short thisOHHandles = 0; // and no handles
|
||||||
|
|
||||||
public kelondroArray(File file, int[] columns, int intprops, boolean exitOnFail) {
|
public kelondroArray(File file, int[] columns, int intprops, boolean exitOnFail) {
|
||||||
// this creates a new array
|
// this creates a new array
|
||||||
|
@ -96,13 +96,13 @@ public class kelondroArray extends kelondroRecords {
|
||||||
return row().newEntry(getNode(new Handle(index)).getValueRow());
|
return row().newEntry(getNode(new Handle(index)).getValueRow());
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized int seti(int index, int value) throws IOException {
|
protected synchronized int seti(int index, int value) throws IOException {
|
||||||
int before = getHandle(index).hashCode();
|
int before = getHandle(index).hashCode();
|
||||||
setHandle(index, new Handle(value));
|
setHandle(index, new Handle(value));
|
||||||
return before;
|
return before;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized int geti(int index) {
|
protected synchronized int geti(int index) {
|
||||||
return getHandle(index).hashCode();
|
return getHandle(index).hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ public class kelondroCollectionIndex {
|
||||||
if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
|
if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
|
||||||
|
|
||||||
// first find an old entry, if one exists
|
// first find an old entry, if one exists
|
||||||
byte[][] oldindexrow = index.get(key);
|
kelondroRow.Entry oldindexrow = index.get(key);
|
||||||
|
|
||||||
// define the new storage array
|
// define the new storage array
|
||||||
byte[][] newarrayrow = new byte[][]{key,
|
byte[][] newarrayrow = new byte[][]{key,
|
||||||
|
@ -144,8 +144,8 @@ public class kelondroCollectionIndex {
|
||||||
// overwrite the old collection
|
// overwrite the old collection
|
||||||
// read old information
|
// read old information
|
||||||
//int chunksize = (int) kelondroNaturalOrder.decodeLong(oldindexrow[1]); // needed only for migration
|
//int chunksize = (int) kelondroNaturalOrder.decodeLong(oldindexrow[1]); // needed only for migration
|
||||||
int chunkcount = (int) kelondroNaturalOrder.decodeLong(oldindexrow[2]);
|
int chunkcount = (int) oldindexrow.getColLongB256(2);
|
||||||
int rownumber = (int) kelondroNaturalOrder.decodeLong(oldindexrow[3]);
|
int rownumber = (int) oldindexrow.getColLongB256(3);
|
||||||
int oldPartitionNumber = arrayIndex(chunkcount);
|
int oldPartitionNumber = arrayIndex(chunkcount);
|
||||||
int newPartitionNumber = arrayIndex(collection.size());
|
int newPartitionNumber = arrayIndex(collection.size());
|
||||||
|
|
||||||
|
@ -178,12 +178,12 @@ public class kelondroCollectionIndex {
|
||||||
|
|
||||||
public kelondroCollection get(byte[] key) throws IOException {
|
public kelondroCollection get(byte[] key) throws IOException {
|
||||||
// find an entry, if one exists
|
// find an entry, if one exists
|
||||||
byte[][] indexrow = index.get(key);
|
kelondroRow.Entry indexrow = index.get(key);
|
||||||
if (indexrow == null) return null;
|
if (indexrow == null) return null;
|
||||||
// read values
|
// read values
|
||||||
int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]);
|
int chunksize = (int) indexrow.getColLongB256(1);
|
||||||
int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]);
|
int chunkcount = (int) indexrow.getColLongB256(2);
|
||||||
int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]);
|
int rownumber = (int) indexrow.getColLongB256(3);
|
||||||
int partitionnumber = arrayIndex(chunkcount);
|
int partitionnumber = arrayIndex(chunkcount);
|
||||||
// open array entry
|
// open array entry
|
||||||
kelondroRow.Entry arrayrow = array[partitionnumber].get(rownumber);
|
kelondroRow.Entry arrayrow = array[partitionnumber].get(rownumber);
|
||||||
|
@ -196,12 +196,12 @@ public class kelondroCollectionIndex {
|
||||||
|
|
||||||
public void remove(byte[] key) throws IOException {
|
public void remove(byte[] key) throws IOException {
|
||||||
// find an entry, if one exists
|
// find an entry, if one exists
|
||||||
byte[][] indexrow = index.get(key);
|
kelondroRow.Entry indexrow = index.get(key);
|
||||||
if (indexrow == null) return;
|
if (indexrow == null) return;
|
||||||
// read values
|
// read values
|
||||||
//int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]);
|
//int chunksize = (int) kelondroNaturalOrder.decodeLong(indexrow[1]);
|
||||||
int chunkcount = (int) kelondroNaturalOrder.decodeLong(indexrow[2]);
|
int chunkcount = (int) indexrow.getColLongB256(2);
|
||||||
int rownumber = (int) kelondroNaturalOrder.decodeLong(indexrow[3]);
|
int rownumber = (int) indexrow.getColLongB256(3);
|
||||||
int partitionnumber = arrayIndex(chunkcount);
|
int partitionnumber = arrayIndex(chunkcount);
|
||||||
// remove array entry
|
// remove array entry
|
||||||
array[partitionnumber].remove(rownumber);
|
array[partitionnumber].remove(rownumber);
|
||||||
|
|
|
@ -195,11 +195,11 @@ public class kelondroDyn extends kelondroTree {
|
||||||
if (buffered != null) return buffered;
|
if (buffered != null) return buffered;
|
||||||
|
|
||||||
// read from db
|
// read from db
|
||||||
byte[][] result = get(key);
|
kelondroRow.Entry result = get(key);
|
||||||
if (result == null) return null;
|
if (result == null) return null;
|
||||||
|
|
||||||
// return result
|
// return result
|
||||||
return result[1];
|
return result.getColBytes(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void setValueCached(byte[] key, byte[] value) throws IOException {
|
private synchronized void setValueCached(byte[] key, byte[] value) throws IOException {
|
||||||
|
|
|
@ -187,7 +187,7 @@ public class kelondroDynTree {
|
||||||
byte[][] entry = (byte[][]) tcache.get(key);
|
byte[][] entry = (byte[][]) tcache.get(key);
|
||||||
if (entry == null) {
|
if (entry == null) {
|
||||||
kelondroTree t = getTree(this.tablename);
|
kelondroTree t = getTree(this.tablename);
|
||||||
entry = t.get(key);
|
entry = t.get(key).getCols();
|
||||||
t.close();
|
t.close();
|
||||||
this.tcache.put(key, entry);
|
this.tcache.put(key, entry);
|
||||||
this.timestamp = System.currentTimeMillis();
|
this.timestamp = System.currentTimeMillis();
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class kelondroFScoreCluster {
|
||||||
|
|
||||||
public void addScore(String word) throws IOException {
|
public void addScore(String word) throws IOException {
|
||||||
word = word.toLowerCase();
|
word = word.toLowerCase();
|
||||||
byte[][] record = refcountDB.get(word.getBytes());
|
kelondroRow.Entry record = refcountDB.get(word.getBytes());
|
||||||
long c;
|
long c;
|
||||||
String cs;
|
String cs;
|
||||||
if (record == null) {
|
if (record == null) {
|
||||||
|
@ -94,7 +94,7 @@ public class kelondroFScoreCluster {
|
||||||
c = 0;
|
c = 0;
|
||||||
} else {
|
} else {
|
||||||
// delete old entry
|
// delete old entry
|
||||||
c = kelondroBase64Order.enhancedCoder.decodeLong(new String(record[1]));
|
c = record.getColLongB64E(1);
|
||||||
cs = kelondroBase64Order.enhancedCoder.encodeLong(c, countlength);
|
cs = kelondroBase64Order.enhancedCoder.encodeLong(c, countlength);
|
||||||
countrefDB.remove((cs + word).getBytes());
|
countrefDB.remove((cs + word).getBytes());
|
||||||
c++;
|
c++;
|
||||||
|
@ -116,11 +116,11 @@ public class kelondroFScoreCluster {
|
||||||
|
|
||||||
public long getScore(String word) throws IOException {
|
public long getScore(String word) throws IOException {
|
||||||
word = word.toLowerCase();
|
word = word.toLowerCase();
|
||||||
byte[][] record = refcountDB.get(word.getBytes());
|
kelondroRow.Entry record = refcountDB.get(word.getBytes());
|
||||||
if (record == null) {
|
if (record == null) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
return kelondroBase64Order.enhancedCoder.decodeLong(new String(record[1]));
|
return record.getColLongB64E(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ public interface kelondroIndex {
|
||||||
public int columns();
|
public int columns();
|
||||||
public int columnSize(int column);
|
public int columnSize(int column);
|
||||||
|
|
||||||
public byte[][] get(byte[] key) throws IOException;
|
public kelondroRow.Entry get(byte[] key) throws IOException;
|
||||||
public byte[][] put(byte[][] row) throws IOException;
|
public byte[][] put(byte[][] row) throws IOException;
|
||||||
public byte[][] remove(byte[] key) throws IOException;
|
public byte[][] remove(byte[] key) throws IOException;
|
||||||
//public Iterator rows(boolean up, boolean rotating, byte[] startKey) throws IOException; // Objects are of type byte[][]
|
//public Iterator rows(boolean up, boolean rotating, byte[] startKey) throws IOException; // Objects are of type byte[][]
|
||||||
|
|
|
@ -1136,7 +1136,7 @@ public class kelondroRecords {
|
||||||
}
|
}
|
||||||
|
|
||||||
public class contentIterator implements Iterator {
|
public class contentIterator implements Iterator {
|
||||||
// iterator that iterates all byte[][]-objects in the file
|
// iterator that iterates all kelondroRow.Entry-objects in the file
|
||||||
// all records that are marked as deleted are ommitted
|
// all records that are marked as deleted are ommitted
|
||||||
// this is probably also the fastest way to iterate all objects
|
// this is probably also the fastest way to iterate all objects
|
||||||
|
|
||||||
|
@ -1167,7 +1167,7 @@ public class kelondroRecords {
|
||||||
Node n = new Node(pos);
|
Node n = new Node(pos);
|
||||||
pos.index++;
|
pos.index++;
|
||||||
while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++;
|
while ((markedDeleted.contains(pos)) && (pos.index < USAGE.allCount())) pos.index++;
|
||||||
return row().newEntry(n.getValueRow()).getCols();
|
return row().newEntry(n.getValueRow());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new kelondroException(filename, e.getMessage());
|
throw new kelondroException(filename, e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,10 +113,12 @@ public class kelondroRow {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Entry newEntry(byte[] rowinstance) {
|
public Entry newEntry(byte[] rowinstance) {
|
||||||
|
if (rowinstance == null) return null;
|
||||||
return new Entry(rowinstance);
|
return new Entry(rowinstance);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Entry newEntry(byte[][] cells) {
|
public Entry newEntry(byte[][] cells) {
|
||||||
|
if (cells == null) return null;
|
||||||
return new Entry(cells);
|
return new Entry(cells);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -141,7 +141,7 @@ public class kelondroSplittedTree implements kelondroIndex {
|
||||||
return (int) order.partition(key, ff);
|
return (int) order.partition(key, ff);
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[][] get(byte[] key) throws IOException {
|
public kelondroRow.Entry get(byte[] key) throws IOException {
|
||||||
return ktfs[partition(key)].get(key);
|
return ktfs[partition(key)].get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -126,7 +126,7 @@ public class kelondroTables {
|
||||||
return table.get(key);
|
return table.get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized byte[][] selectByte(String tablename, String key) throws IOException {
|
public synchronized kelondroRow.Entry selectByte(String tablename, String key) throws IOException {
|
||||||
kelondroTree tree = (kelondroTree) tTables.get(tablename);
|
kelondroTree tree = (kelondroTree) tTables.get(tablename);
|
||||||
if (tree == null) throw new RuntimeException("kelondroTables.selectByte: tree table '" + tablename + "' does not exist.");
|
if (tree == null) throw new RuntimeException("kelondroTables.selectByte: tree table '" + tablename + "' does not exist.");
|
||||||
return tree.get(key.getBytes());
|
return tree.get(key.getBytes());
|
||||||
|
|
|
@ -221,6 +221,27 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the value to which this map maps the specified key.
|
// Returns the value to which this map maps the specified key.
|
||||||
|
public kelondroRow.Entry get(byte[] key) throws IOException {
|
||||||
|
// System.out.println("kelondroTree.get " + new String(key) + " in " + filename);
|
||||||
|
kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key);
|
||||||
|
if (result != null) {
|
||||||
|
//System.out.println("cache hit in objectCache, db:" + super.filename);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
if ((objectCache != null) && (objectCache.has(key) == -1)) return null;
|
||||||
|
synchronized (writeSearchObj) {
|
||||||
|
writeSearchObj.process(key);
|
||||||
|
if (writeSearchObj.found()) {
|
||||||
|
result = row().newEntry(writeSearchObj.getMatcher().getValueRow());
|
||||||
|
if (objectCache != null) objectCache.put(key, result);
|
||||||
|
} else {
|
||||||
|
result = null;
|
||||||
|
if (objectCache != null) objectCache.hasnot(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
/*
|
||||||
public byte[][] get(byte[] key) throws IOException {
|
public byte[][] get(byte[] key) throws IOException {
|
||||||
// System.out.println("kelondroTree.get " + new String(key) + " in " + filename);
|
// System.out.println("kelondroTree.get " + new String(key) + " in " + filename);
|
||||||
kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key);
|
kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key);
|
||||||
|
@ -241,6 +262,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
|
||||||
}
|
}
|
||||||
return (result == null) ? null : result.getCols();
|
return (result == null) ? null : result.getCols();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
public class Search {
|
public class Search {
|
||||||
|
|
||||||
|
@ -365,10 +387,10 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized boolean isChild(Node childn, Node parentn, int child) {
|
public synchronized boolean isChild(Node childn, Node parentn, int child) {
|
||||||
if (childn == null) throw new IllegalArgumentException("isLeftChild: Node parameter is NULL");
|
if (childn == null) throw new IllegalArgumentException("isLeftChild: Node parameter is NULL");
|
||||||
Handle lc = parentn.getOHHandle(child);
|
Handle lc = parentn.getOHHandle(child);
|
||||||
if (lc == null) return false;
|
if (lc == null) return false;
|
||||||
return (lc.equals(childn.handle()));
|
return (lc.equals(childn.handle()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Associates the specified value with the specified key in this map
|
// Associates the specified value with the specified key in this map
|
||||||
|
@ -1360,8 +1382,8 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
|
||||||
}
|
}
|
||||||
} else if (args[0].equals("-g")) {
|
} else if (args[0].equals("-g")) {
|
||||||
kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10);
|
kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10);
|
||||||
byte[][] ret2 = fm.get(args[2].getBytes());
|
kelondroRow.Entry ret2 = fm.get(args[2].getBytes());
|
||||||
ret = ((ret2 == null) ? null : ret2[1]);
|
ret = ((ret2 == null) ? null : ret2.getColBytes(1));
|
||||||
fm.close();
|
fm.close();
|
||||||
} else if (args[0].equals("-n")) {
|
} else if (args[0].equals("-n")) {
|
||||||
kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10);
|
kelondroTree fm = new kelondroTree(new File(args[1]), 0x100000, 10);
|
||||||
|
|
|
@ -4,6 +4,7 @@ import java.io.File;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import de.anomic.index.indexContainer;
|
import de.anomic.index.indexContainer;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
import de.anomic.plasma.plasmaWordIndexAssortment;
|
import de.anomic.plasma.plasmaWordIndexAssortment;
|
||||||
|
|
||||||
|
@ -93,10 +94,10 @@ public class plasmaWordIndexAssortmentImporter extends AbstractImporter implemen
|
||||||
this.wordEntityCount++;
|
this.wordEntityCount++;
|
||||||
|
|
||||||
// getting next entry as byte array
|
// getting next entry as byte array
|
||||||
byte[][] row = (byte[][]) contentIter.next();
|
kelondroRow.Entry row = (kelondroRow.Entry) contentIter.next();
|
||||||
|
|
||||||
// getting the word hash
|
// getting the word hash
|
||||||
String hash = new String(row[0]);
|
String hash = row.getColString(0, null);
|
||||||
|
|
||||||
// creating an word entry container
|
// creating an word entry container
|
||||||
indexContainer container;
|
indexContainer container;
|
||||||
|
|
|
@ -54,6 +54,7 @@ import java.util.Iterator;
|
||||||
|
|
||||||
import de.anomic.index.indexURL;
|
import de.anomic.index.indexURL;
|
||||||
import de.anomic.kelondro.kelondroBase64Order;
|
import de.anomic.kelondro.kelondroBase64Order;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
import de.anomic.tools.bitfield;
|
import de.anomic.tools.bitfield;
|
||||||
|
|
||||||
|
@ -178,18 +179,18 @@ public class plasmaCrawlEURL extends indexURL {
|
||||||
// - look into the filed properties
|
// - look into the filed properties
|
||||||
// if the url cannot be found, this returns null
|
// if the url cannot be found, this returns null
|
||||||
this.hash = hash;
|
this.hash = hash;
|
||||||
byte[][] entry = urlHashCache.get(hash.getBytes());
|
kelondroRow.Entry entry = urlHashCache.get(hash.getBytes());
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
this.referrer = new String(entry[1], "UTF-8");
|
this.referrer = entry.getColString(1, "UTF-8");
|
||||||
this.initiator = new String(entry[2], "UTF-8");
|
this.initiator = entry.getColString(2, "UTF-8");
|
||||||
this.executor = new String(entry[3], "UTF-8");
|
this.executor = entry.getColString(3, "UTF-8");
|
||||||
this.url = new URL(new String(entry[4], "UTF-8").trim());
|
this.url = new URL(entry.getColString(4, "UTF-8").trim());
|
||||||
this.name = new String(entry[5], "UTF-8").trim();
|
this.name = entry.getColString(5, "UTF-8").trim();
|
||||||
this.initdate = new Date(86400000 * kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[6], "UTF-8")));
|
this.initdate = new Date(86400000 * entry.getColLongB64E(6));
|
||||||
this.trydate = new Date(86400000 * kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[7], "UTF-8")));
|
this.trydate = new Date(86400000 * entry.getColLongB64E(7));
|
||||||
this.trycount = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[8], "UTF-8"));
|
this.trycount = (int) entry.getColLongB64E(8);
|
||||||
this.failreason = new String(entry[9], "UTF-8");
|
this.failreason = entry.getColString(9, "UTF-8");
|
||||||
this.flags = new bitfield(entry[10]);
|
this.flags = new bitfield(entry.getColBytes(10));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,6 +70,7 @@ import de.anomic.index.indexURL;
|
||||||
import de.anomic.index.indexURLEntry;
|
import de.anomic.index.indexURLEntry;
|
||||||
import de.anomic.kelondro.kelondroBase64Order;
|
import de.anomic.kelondro.kelondroBase64Order;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.plasma.plasmaHTCache;
|
import de.anomic.plasma.plasmaHTCache;
|
||||||
import de.anomic.server.serverCodings;
|
import de.anomic.server.serverCodings;
|
||||||
import de.anomic.server.serverObjects;
|
import de.anomic.server.serverObjects;
|
||||||
|
@ -459,22 +460,22 @@ public final class plasmaCrawlLURL extends indexURL {
|
||||||
// - look into the filed properties
|
// - look into the filed properties
|
||||||
// if the url cannot be found, this returns null
|
// if the url cannot be found, this returns null
|
||||||
this.urlHash = urlHash;
|
this.urlHash = urlHash;
|
||||||
byte[][] entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
|
kelondroRow.Entry entry = plasmaCrawlLURL.this.urlHashCache.get(urlHash.getBytes());
|
||||||
if (entry == null) throw new IOException("url hash " + urlHash + " not found in LURL");
|
if (entry == null) throw new IOException("url hash " + urlHash + " not found in LURL");
|
||||||
try {
|
try {
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
this.url = new URL(new String(entry[1], "UTF-8").trim());
|
this.url = new URL(entry.getColString(1, "UTF-8").trim());
|
||||||
this.descr = (entry[2] == null) ? this.url.toString() : new String(entry[2], "UTF-8").trim();
|
this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
|
||||||
this.moddate = new Date(86400000 * kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[3], "UTF-8")));
|
this.moddate = new Date(86400000 * entry.getColLongB64E(3));
|
||||||
this.loaddate = new Date(86400000 * kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[4], "UTF-8")));
|
this.loaddate = new Date(86400000 * entry.getColLongB64E(4));
|
||||||
this.referrerHash = (entry[5] == null) ? dummyHash : new String(entry[5], "UTF-8");
|
this.referrerHash = (entry.empty(5)) ? dummyHash : entry.getColString(5, "UTF-8");
|
||||||
this.copyCount = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[6], "UTF-8"));
|
this.copyCount = (int) entry.getColLongB64E(6);
|
||||||
this.flags = new String(entry[7], "UTF-8");
|
this.flags = entry.getColString(7, "UTF-8");
|
||||||
this.quality = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[8], "UTF-8"));
|
this.quality = (int) entry.getColLongB64E(8);
|
||||||
this.language = new String(entry[9], "UTF-8");
|
this.language = entry.getColString(9, "UTF-8");
|
||||||
this.doctype = (char) entry[10][0];
|
this.doctype = (char) entry.getColByte(10);
|
||||||
this.size = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[11], "UTF-8"));
|
this.size = (int) entry.getColLongB64E(11);
|
||||||
this.wordCount = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[12], "UTF-8"));
|
this.wordCount = (int) entry.getColLongB64E(12);
|
||||||
this.snippet = null;
|
this.snippet = null;
|
||||||
this.word = searchedWord;
|
this.word = searchedWord;
|
||||||
this.stored = false;
|
this.stored = false;
|
||||||
|
@ -828,10 +829,10 @@ public final class plasmaCrawlLURL extends indexURL {
|
||||||
String oldUrlStr = null;
|
String oldUrlStr = null;
|
||||||
try {
|
try {
|
||||||
// getting the url data as byte array
|
// getting the url data as byte array
|
||||||
byte[][] entry = urlHashCache.get(urlHash.getBytes());
|
kelondroRow.Entry entry = urlHashCache.get(urlHash.getBytes());
|
||||||
|
|
||||||
// getting the wrong url string
|
// getting the wrong url string
|
||||||
oldUrlStr = new String(entry[1]).trim();
|
oldUrlStr = entry.getColString(1, null).trim();
|
||||||
|
|
||||||
int pos = -1;
|
int pos = -1;
|
||||||
if ((pos = oldUrlStr.indexOf("://")) != -1) {
|
if ((pos = oldUrlStr.indexOf("://")) != -1) {
|
||||||
|
@ -844,8 +845,8 @@ public final class plasmaCrawlLURL extends indexURL {
|
||||||
response res = theHttpc.HEAD(newUrl.getPath(), null);
|
response res = theHttpc.HEAD(newUrl.getPath(), null);
|
||||||
|
|
||||||
if (res.statusCode == 200) {
|
if (res.statusCode == 200) {
|
||||||
entry[1] = newUrl.toString().getBytes();
|
entry.setCol(1, newUrl.toString().getBytes());
|
||||||
urlHashCache.put(entry);
|
urlHashCache.put(entry.getCols());
|
||||||
log.logInfo("UrlDB-Entry with urlHash '" + urlHash + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
|
log.logInfo("UrlDB-Entry with urlHash '" + urlHash + "' corrected\n\tURL: " + oldUrlStr + " -> " + newUrlStr);
|
||||||
} else {
|
} else {
|
||||||
remove(urlHash);
|
remove(urlHash);
|
||||||
|
|
|
@ -57,6 +57,7 @@ import de.anomic.kelondro.kelondroException;
|
||||||
import de.anomic.kelondro.kelondroRecords;
|
import de.anomic.kelondro.kelondroRecords;
|
||||||
import de.anomic.kelondro.kelondroStack;
|
import de.anomic.kelondro.kelondroStack;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.server.logging.serverLog;
|
import de.anomic.server.logging.serverLog;
|
||||||
import de.anomic.tools.bitfield;
|
import de.anomic.tools.bitfield;
|
||||||
|
|
||||||
|
@ -507,20 +508,20 @@ public class plasmaCrawlNURL extends indexURL {
|
||||||
// - look into the filed properties
|
// - look into the filed properties
|
||||||
// if the url cannot be found, this returns null
|
// if the url cannot be found, this returns null
|
||||||
this.hash = hash;
|
this.hash = hash;
|
||||||
byte[][] entry = urlHashCache.get(hash.getBytes());
|
kelondroRow.Entry entry = urlHashCache.get(hash.getBytes());
|
||||||
if (entry != null) {
|
if (entry != null) {
|
||||||
//try {
|
//try {
|
||||||
this.initiator = new String(entry[1]);
|
this.initiator = entry.getColString(1, null);
|
||||||
this.url = new URL(new String(entry[2]).trim());
|
this.url = new URL(entry.getColString(2, null).trim());
|
||||||
this.referrer = (entry[3] == null) ? dummyHash : new String(entry[3]);
|
this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null);
|
||||||
this.name = (entry[4] == null) ? "" : new String(entry[4]).trim();
|
this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim();
|
||||||
this.loaddate = new Date(86400000 * kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[5])));
|
this.loaddate = new Date(86400000 * entry.getColLongB64E(5));
|
||||||
this.profileHandle = (entry[6] == null) ? null : new String(entry[6]).trim();
|
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
|
||||||
this.depth = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[7]));
|
this.depth = (int) entry.getColLongB64E(7);
|
||||||
this.anchors = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[8]));
|
this.anchors = (int) entry.getColLongB64E(8);
|
||||||
this.forkfactor = (int) kelondroBase64Order.enhancedCoder.decodeLong(new String(entry[9]));
|
this.forkfactor = (int) entry.getColLongB64E(9);
|
||||||
this.flags = new bitfield(entry[10]);
|
this.flags = new bitfield(entry.getColBytes(10));
|
||||||
this.handle = Integer.parseInt(new String(entry[11]));
|
this.handle = Integer.parseInt(entry.getColString(11, null), 16);
|
||||||
return;
|
return;
|
||||||
//} catch (MalformedURLException e) {
|
//} catch (MalformedURLException e) {
|
||||||
// throw new IOException("plasmaCrawlNURL/Entry: " + e);
|
// throw new IOException("plasmaCrawlNURL/Entry: " + e);
|
||||||
|
|
|
@ -63,6 +63,7 @@ import de.anomic.index.indexURLEntry;
|
||||||
import de.anomic.kelondro.kelondroException;
|
import de.anomic.kelondro.kelondroException;
|
||||||
import de.anomic.kelondro.kelondroNaturalOrder;
|
import de.anomic.kelondro.kelondroNaturalOrder;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.server.logging.serverLog;
|
import de.anomic.server.logging.serverLog;
|
||||||
|
|
||||||
public final class plasmaWordIndexAssortment {
|
public final class plasmaWordIndexAssortment {
|
||||||
|
@ -160,9 +161,9 @@ public final class plasmaWordIndexAssortment {
|
||||||
public indexContainer remove(String wordHash) {
|
public indexContainer remove(String wordHash) {
|
||||||
// deletes a word index from assortment database
|
// deletes a word index from assortment database
|
||||||
// and returns the content record
|
// and returns the content record
|
||||||
byte[][] row = null;
|
kelondroRow.Entry row = null;
|
||||||
try {
|
try {
|
||||||
row = assortments.remove(wordHash.getBytes());
|
row = assortments.row().newEntry(assortments.remove(wordHash.getBytes()));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.logSevere("removeAssortment/IO-error: " + e.getMessage()
|
log.logSevere("removeAssortment/IO-error: " + e.getMessage()
|
||||||
+ " - reset assortment-DB " + assortments.file(), e);
|
+ " - reset assortment-DB " + assortments.file(), e);
|
||||||
|
@ -180,7 +181,7 @@ public final class plasmaWordIndexAssortment {
|
||||||
public boolean contains(String wordHash) {
|
public boolean contains(String wordHash) {
|
||||||
// gets a word index from assortment database
|
// gets a word index from assortment database
|
||||||
// and returns the content record
|
// and returns the content record
|
||||||
byte[][] row = null;
|
kelondroRow.Entry row = null;
|
||||||
try {
|
try {
|
||||||
row = assortments.get(wordHash.getBytes());
|
row = assortments.get(wordHash.getBytes());
|
||||||
return (row != null);
|
return (row != null);
|
||||||
|
@ -197,7 +198,7 @@ public final class plasmaWordIndexAssortment {
|
||||||
public indexContainer get(String wordHash) {
|
public indexContainer get(String wordHash) {
|
||||||
// gets a word index from assortment database
|
// gets a word index from assortment database
|
||||||
// and returns the content record
|
// and returns the content record
|
||||||
byte[][] row = null;
|
kelondroRow.Entry row = null;
|
||||||
try {
|
try {
|
||||||
row = assortments.get(wordHash.getBytes());
|
row = assortments.get(wordHash.getBytes());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -214,14 +215,14 @@ public final class plasmaWordIndexAssortment {
|
||||||
return row2container(wordHash, row);
|
return row2container(wordHash, row);
|
||||||
}
|
}
|
||||||
|
|
||||||
public indexContainer row2container(String wordHash, byte[][] row) {
|
public indexContainer row2container(String wordHash, kelondroRow.Entry row) {
|
||||||
if (row == null) return null;
|
if (row == null) return null;
|
||||||
final long updateTime = kelondroNaturalOrder.decodeLong(row[2]);
|
final long updateTime = row.getColLongB256(2);
|
||||||
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
|
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
|
||||||
for (int i = 0; i < assortmentLength; i++) {
|
for (int i = 0; i < assortmentLength; i++) {
|
||||||
container.add(
|
container.add(
|
||||||
new indexURLEntry[] { new indexURLEntry(
|
new indexURLEntry[] { new indexURLEntry(
|
||||||
new String(row[3 + 2 * i]), new String(row[4 + 2 * i])) }, updateTime);
|
new String(row.getColBytes(3 + 2 * i)), new String(row.getColBytes(4 + 2 * i))) }, updateTime);
|
||||||
}
|
}
|
||||||
return container;
|
return container;
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,6 +53,7 @@ import de.anomic.index.indexURL;
|
||||||
import de.anomic.index.indexURLEntry;
|
import de.anomic.index.indexURLEntry;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
import de.anomic.kelondro.kelondroException;
|
import de.anomic.kelondro.kelondroException;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.server.logging.serverLog;
|
import de.anomic.server.logging.serverLog;
|
||||||
|
|
||||||
public final class plasmaWordIndexFile {
|
public final class plasmaWordIndexFile {
|
||||||
|
@ -137,9 +138,9 @@ public final class plasmaWordIndexFile {
|
||||||
}
|
}
|
||||||
|
|
||||||
public indexURLEntry getEntry(String urlhash) throws IOException {
|
public indexURLEntry getEntry(String urlhash) throws IOException {
|
||||||
byte[][] n = theIndex.get(urlhash.getBytes());
|
kelondroRow.Entry n = theIndex.get(urlhash.getBytes());
|
||||||
if (n == null) return null;
|
if (n == null) return null;
|
||||||
return new indexURLEntry(new String(n[0]), new String(n[1]));
|
return new indexURLEntry(n.getColString(0, null), n.getColString(1, null));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean contains(String urlhash) throws IOException {
|
public boolean contains(String urlhash) throws IOException {
|
||||||
|
|
|
@ -52,6 +52,7 @@ import de.anomic.yacy.yacyCore;
|
||||||
import de.anomic.kelondro.kelondroBase64Order;
|
import de.anomic.kelondro.kelondroBase64Order;
|
||||||
import de.anomic.kelondro.kelondroTree;
|
import de.anomic.kelondro.kelondroTree;
|
||||||
import de.anomic.kelondro.kelondroException;
|
import de.anomic.kelondro.kelondroException;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.server.serverCodings;
|
import de.anomic.server.serverCodings;
|
||||||
import de.anomic.server.serverDate;
|
import de.anomic.server.serverDate;
|
||||||
|
|
||||||
|
@ -169,6 +170,17 @@ public class yacyNewsDB {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static yacyNewsRecord b2r(kelondroRow.Entry b) {
|
||||||
|
if (b == null) return null;
|
||||||
|
return new yacyNewsRecord(
|
||||||
|
b.getColString(0, null),
|
||||||
|
b.getColString(1, null),
|
||||||
|
(b.empty(2)) ? null : yacyCore.parseUniversalDate(b.getColString(2, null), serverDate.UTCDiffString()),
|
||||||
|
(int) b.getColLongB64E(3),
|
||||||
|
serverCodings.string2map(b.getColString(4, null))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
private static yacyNewsRecord b2r(byte[][] b) {
|
private static yacyNewsRecord b2r(byte[][] b) {
|
||||||
if (b == null) return null;
|
if (b == null) return null;
|
||||||
return new yacyNewsRecord(
|
return new yacyNewsRecord(
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
// done inside the copyright notive above. A re-distribution must contain
|
// done inside the copyright notive above. A re-distribution must contain
|
||||||
// the intact and unchanged copyright notice.
|
// the intact and unchanged copyright notice.
|
||||||
// Contributions and changes to the program code must be marked as such.
|
// Contributions and changes to the program code must be marked as such.
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
import java.io.BufferedOutputStream;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.BufferedWriter;
|
import java.io.BufferedWriter;
|
||||||
|
@ -77,6 +78,7 @@ import de.anomic.index.indexURL;
|
||||||
import de.anomic.kelondro.kelondroDyn;
|
import de.anomic.kelondro.kelondroDyn;
|
||||||
import de.anomic.kelondro.kelondroMScoreCluster;
|
import de.anomic.kelondro.kelondroMScoreCluster;
|
||||||
import de.anomic.kelondro.kelondroMap;
|
import de.anomic.kelondro.kelondroMap;
|
||||||
|
import de.anomic.kelondro.kelondroRow;
|
||||||
import de.anomic.plasma.plasmaCrawlLURL;
|
import de.anomic.plasma.plasmaCrawlLURL;
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
import de.anomic.plasma.plasmaURLPool;
|
import de.anomic.plasma.plasmaURLPool;
|
||||||
|
@ -752,8 +754,8 @@ public final class yacy {
|
||||||
while (contentIter.hasNext()) {
|
while (contentIter.hasNext()) {
|
||||||
wordEntityCount++;
|
wordEntityCount++;
|
||||||
|
|
||||||
byte[][] row = (byte[][]) contentIter.next();
|
kelondroRow.Entry row = (kelondroRow.Entry) contentIter.next();
|
||||||
String hash = new String(row[0]);
|
String hash = row.getColString(0, null);
|
||||||
indexContainer container = assortmentFile.row2container(hash, row);
|
indexContainer container = assortmentFile.row2container(hash, row);
|
||||||
wordEntryCount += container.size();
|
wordEntryCount += container.size();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user