- removed locks from WordReference

- refactoring of HeapReader/Writer

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7514 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2011-02-23 00:32:16 +00:00
parent cd19d0517e
commit 5e45ded8e2
11 changed files with 135 additions and 113 deletions

View File

@ -42,6 +42,7 @@ import net.yacy.kelondro.io.Writer;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.order.RotateIterator;
import net.yacy.kelondro.util.FileUtils;
@ -147,17 +148,17 @@ public class HeapReader {
private boolean initIndexReadDump() {
// look for an index dump and read it if it exist
// if this is successful, return true; otherwise false
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile);
String fingerprint = fingerprintFileHash(this.heapFile);
if (fingerprint == null) {
Log.logSevere("HeapReader", "cannot generate a fingerprint for " + this.heapFile + ": null");
return false;
}
File fif = HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint);
File fif = fingerprintIndexFile(this.heapFile, fingerprint);
if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz");
File fgf = HeapWriter.fingerprintGapFile(this.heapFile, fingerprint);
File fgf = fingerprintGapFile(this.heapFile, fingerprint);
if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz");
if (!fif.exists() || !fgf.exists()) {
HeapWriter.deleteAllFingerprints(this.heapFile);
deleteAllFingerprints(this.heapFile, fif.getName(), fgf.getName());
return false;
}
@ -194,6 +195,41 @@ public class HeapReader {
return !this.index.isEmpty();
}
protected static File fingerprintIndexFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx");
}
protected static File fingerprintGapFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap");
}
protected static String fingerprintFileHash(File f) {
assert f != null;
assert f.exists() : "file = " + f.toString();
String fp = Digest.fastFingerprintB64(f, false);
assert fp != null : "file = " + f.toString();
if (fp == null) return null;
return fp.substring(0, 12);
}
public static void deleteAllFingerprints(File f, String exception1, String exception2) {
File d = f.getParentFile();
String n = f.getName();
String[] l = d.list();
for (int i = 0; i < l.length; i++) {
if (!l[i].startsWith(n)) continue;
if (exception1 != null && l[i].equals(exception1)) continue;
if (exception2 != null && l[i].equals(exception2)) continue;
if (l[i].endsWith(".idx") ||
l[i].endsWith(".gap") ||
l[i].endsWith(".idx.gz") ||
l[i].endsWith(".gap.gz")
) FileUtils.deletedelete(new File(d, l[i]));
}
}
private void initIndexReadFromHeap() throws IOException {
// this initializes the this.index object by reading positions from the heap file
Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait.");
@ -513,16 +549,16 @@ public class HeapReader {
// to speed up the next start
try {
long start = System.currentTimeMillis();
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile);
String fingerprint = fingerprintFileHash(this.heapFile);
if (fingerprint == null) {
Log.logSevere("kelondroBLOBHeap", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null");
} else {
free.dump(HeapWriter.fingerprintGapFile(this.heapFile, fingerprint));
free.dump(fingerprintGapFile(this.heapFile, fingerprint));
}
free.clear();
free = null;
if (fingerprint != null) {
index.dump(HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint));
index.dump(fingerprintIndexFile(this.heapFile, fingerprint));
Log.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
}
index.close();

View File

@ -34,7 +34,6 @@ import net.yacy.kelondro.index.HandleMap;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.FileUtils;
@ -110,34 +109,6 @@ public final class HeapWriter {
//os.flush(); // necessary? may cause bad IO performance :-(
}
protected static File fingerprintIndexFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx");
}
protected static File fingerprintGapFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap");
}
protected static String fingerprintFileHash(File f) {
assert f != null;
assert f.exists() : "file = " + f.toString();
String fp = Digest.fastFingerprintB64(f, false);
assert fp != null : "file = " + f.toString();
if (fp == null) return null;
return fp.substring(0, 12);
}
public static void deleteAllFingerprints(File f) {
File d = f.getParentFile();
String n = f.getName();
String[] l = d.list();
for (int i = 0; i < l.length; i++) {
if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap") || l[i].endsWith(".idx.gz") || l[i].endsWith(".gap.gz"))) FileUtils.deletedelete(new File(d, l[i]));
}
}
/**
* close the BLOB table
* @throws
@ -160,12 +131,12 @@ public final class HeapWriter {
// now we can create a dump of the index and the gap information
// to speed up the next start
long start = System.currentTimeMillis();
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFileREADY);
String fingerprint = HeapReader.fingerprintFileHash(this.heapFileREADY);
if (fingerprint == null) {
Log.logSevere("kelondroBLOBHeapWriter", "cannot write a dump for " + heapFileREADY.getName()+ ": fingerprint is null");
} else {
new Gap().dump(fingerprintGapFile(this.heapFileREADY, fingerprint));
index.dump(fingerprintIndexFile(this.heapFileREADY, fingerprint));
new Gap().dump(HeapReader.fingerprintGapFile(this.heapFileREADY, fingerprint));
index.dump(HeapReader.fingerprintIndexFile(this.heapFileREADY, fingerprint));
Log.logInfo("kelondroBLOBHeapWriter", "wrote a dump for the " + this.index.size() + " index entries of " + heapFileREADY.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
}
index.close();

View File

@ -26,6 +26,8 @@
package net.yacy.kelondro.data.citation;
import java.util.Collection;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
@ -214,7 +216,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
throw new UnsupportedOperationException();
}
public int positions() {
public Collection<Integer> positions() {
throw new UnsupportedOperationException();
}

View File

@ -26,6 +26,9 @@
package net.yacy.kelondro.data.image;
import java.util.ArrayList;
import java.util.Collection;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
@ -226,8 +229,8 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
return (int) this.entry.getColLong(col_hitcount);
}
public int positions() {
return 1;
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
}
public int position(int p) {

View File

@ -26,7 +26,8 @@
package net.yacy.kelondro.data.image;
import java.util.ArrayList;
import java.util.Collection;
import java.util.concurrent.ConcurrentLinkedQueue;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.Bitfield;
@ -53,7 +54,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
posinphrase, posofphrase,
urlcomps, urllength, virtualAge,
wordsintext, wordsintitle;
private final ArrayList<Integer> positions;
private final ConcurrentLinkedQueue<Integer> positions;
public double termFrequency;
public ImageReferenceVars(
@ -64,7 +65,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
final int hitcount, // how often appears this word in the text
final int wordcount, // total number of words
final int phrasecount, // total number of phrases
final ArrayList<Integer> ps, // positions of words that are joined into the reference
final ConcurrentLinkedQueue<Integer> ps, // positions of words that are joined into the reference
final int posinphrase, // position of word in its phrase
final int posofphrase, // number of the phrase where word appears
final long lastmodified, // last-modified time of the document where word appears
@ -89,8 +90,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
this.llocal = outlinksSame;
this.lother = outlinksOther;
this.phrasesintext = phrasecount;
this.positions = new ArrayList<Integer>(ps.size());
for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i));
this.positions = new ConcurrentLinkedQueue<Integer>();
for (Integer i: ps) this.positions.add(i);
this.posinphrase = posinphrase;
this.posofphrase = posofphrase;
this.urlcomps = urlComps;
@ -112,8 +113,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
this.llocal = e.llocal();
this.lother = e.lother();
this.phrasesintext = e.phrasesintext();
this.positions = new ArrayList<Integer>(e.positions());
for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i));
this.positions = new ConcurrentLinkedQueue<Integer>();
for (Integer i: e.positions()) this.positions.add(i);
this.posinphrase = e.posinphrase();
this.posofphrase = e.posofphrase();
this.urlcomps = e.urlcomps();
@ -227,12 +228,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
return posinphrase;
}
public int positions() {
return this.positions.size();
}
public int position(int p) {
return this.positions.get(p);
public Collection<Integer> positions() {
return this.positions;
}
public int posofphrase() {
@ -248,7 +245,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
hitcount, // how often appears this word in the text
wordsintext, // total number of words
phrasesintext, // total number of phrases
positions.get(0), // position of word in all words
positions.iterator().next(), // position of word in all words
posinphrase, // position of word in its phrase
posofphrase, // number of the phrase where word appears
lastModified, // last-modified time of the document where word appears
@ -347,7 +344,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
// combine the distance
ImageReference oe = (ImageReference) r;
for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i));
for (Integer i: r.positions()) this.positions.add(i);
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());

View File

@ -26,6 +26,8 @@
package net.yacy.kelondro.data.navigation;
import java.util.Collection;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
@ -174,7 +176,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N
throw new UnsupportedOperationException();
}
public int positions() {
public Collection<Integer> positions() {
throw new UnsupportedOperationException();
}

View File

@ -26,6 +26,8 @@
package net.yacy.kelondro.data.navigation;
import java.util.Collection;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.AbstractReference;
import net.yacy.kelondro.rwi.Reference;
@ -146,7 +148,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
throw new UnsupportedOperationException();
}
public int positions() {
public Collection<Integer> positions() {
throw new UnsupportedOperationException();
}

View File

@ -26,6 +26,9 @@
package net.yacy.kelondro.data.word;
import java.util.ArrayList;
import java.util.Collection;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
@ -257,8 +260,8 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return (int) this.entry.getColLong(col_hitcount);
}
public int positions() {
return 1;
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
}
public int position(final int p) {

View File

@ -26,11 +26,10 @@
package net.yacy.kelondro.data.word;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
@ -62,7 +61,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
posinphrase, posofphrase,
urlcomps, urllength, virtualAge,
wordsintext, wordsintitle;
private final List<Integer> positions;
private final ConcurrentLinkedQueue<Integer> positions;
public double termFrequency;
public WordReferenceVars(
@ -73,7 +72,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
final int hitcount, // how often appears this word in the text
final int wordcount, // total number of words
final int phrasecount, // total number of phrases
final List<Integer> ps, // positions of words that are joined into the reference
final ConcurrentLinkedQueue<Integer> ps, // positions of words that are joined into the reference
final int posinphrase, // position of word in its phrase
final int posofphrase, // number of the phrase where word appears
final long lastmodified, // last-modified time of the document where word appears
@ -98,8 +97,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.llocal = outlinksSame;
this.lother = outlinksOther;
this.phrasesintext = phrasecount;
this.positions = Collections.synchronizedList(new ArrayList<Integer>(ps.size()));
for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i));
this.positions = new ConcurrentLinkedQueue<Integer>();
for (Integer i: ps) this.positions.add(i);
this.posinphrase = posinphrase;
this.posofphrase = posofphrase;
this.urlcomps = urlComps;
@ -121,8 +120,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
this.llocal = e.llocal();
this.lother = e.lother();
this.phrasesintext = e.phrasesintext();
this.positions = new ArrayList<Integer>(e.positions());
for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i));
this.positions = new ConcurrentLinkedQueue<Integer>();
for (Integer i: e.positions()) this.positions.add(i);
this.posinphrase = e.posinphrase();
this.posofphrase = e.posofphrase();
this.urlcomps = e.urlcomps();
@ -237,14 +236,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return posinphrase;
}
public int positions() {
return this.positions.size();
public Collection<Integer> positions() {
return this.positions;
}
public int position(final int p) {
return this.positions.get(p);
}
public int posofphrase() {
return posofphrase;
}
@ -258,7 +253,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
hitcount, // how often appears this word in the text
wordsintext, // total number of words
phrasesintext, // total number of phrases
positions.get(0), // position of word in all words
positions.size() == 0 ? 1 : positions.iterator().next(), // position of word in all words
posinphrase, // position of word in its phrase
posofphrase, // number of the phrase where word appears
lastModified, // last-modified time of the document where word appears
@ -357,7 +352,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
// combine the distance
WordReference oe = (WordReference) r;
for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i));
for (Integer i: r.positions()) this.positions.add(i);
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());

View File

@ -26,26 +26,27 @@
package net.yacy.kelondro.rwi;
import java.util.List;
import java.util.Collection;
import java.util.Iterator;
public abstract class AbstractReference implements Reference {
protected static void a(List<Integer> a, int i) {
protected static void a(Collection<Integer> a, int i) {
assert a != null;
if (i < 0) return; // signal for 'do nothing'
synchronized (a) {
a.clear();
a.add(i);
}
a.clear();
a.add(i);
}
protected static int max(List<Integer> a, List<Integer> b) {
protected static int max(Collection<Integer> a, Collection<Integer> b) {
assert a != null;
if (a.size() == 0) return max(b);
if (b.size() == 0) return max(a);
return Math.max(max(a), max(b));
}
protected static int min(List<Integer> a, List<Integer> b) {
protected static int min(Collection<Integer> a, Collection<Integer> b) {
assert a != null;
if (a.size() == 0) return min(b);
if (b.size() == 0) return min(a);
@ -56,46 +57,56 @@ public abstract class AbstractReference implements Reference {
return Math.min(ma, mb);
}
private static int max(List<Integer> a) {
private static int max(Collection<Integer> a) {
assert a != null;
if (a.size() == 0) return -1;
if (a.size() == 1) return a.get(0);
if (a.size() == 2) return Math.max(a.get(0), a.get(1));
int r = a.get(0);
for (int i = 1; i < a.size(); i++) if (a.get(i) > r) r = a.get(i);
Iterator<Integer> i = a.iterator();
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.max(i.next(), i.next());
int r = i.next();
int s;
while (i.hasNext()) {
s = i.next();
if (s > r) r = s;
}
return r;
}
private static int min(List<Integer> a) {
private static int min(Collection<Integer> a) {
assert a != null;
if (a.size() == 0) return -1;
if (a.size() == 1) return a.get(0);
if (a.size() == 2) return Math.min(a.get(0), a.get(1));
int r = a.get(0);
for (int i = 1; i < a.size(); i++) if (a.get(i) < r) r = a.get(i);
Iterator<Integer> i = a.iterator();
if (a.size() == 1) return i.next();
if (a.size() == 2) return Math.min(i.next(), i.next());
int r = i.next();
int s;
while (i.hasNext()) {
s = i.next();
if (s <r) r = s;
}
return r;
}
public int maxposition() {
assert positions() > 0;
if (positions() == 1) return position(0);
int p = position(0);
for (int i = positions() - 1; i > 0; i--) if (position(i) > p) p = position(i);
return p;
assert positions().size() > 0;
return max(positions());
}
public int minposition() {
assert positions() > 0;
if (positions() == 1) return position(0);
int p = position(0);
for (int i = positions() - 1; i > 0; i--) if (position(i) < p) p = position(i);
return p;
assert positions().size() > 0;
return min(positions());
}
public int distance() {
if (positions().size() < 2) return 0;
int d = 0;
for (int i = 0; i < this.positions() - 1; i++) {
d += Math.abs(this.position(i) - this.position(i + 1));
Iterator<Integer> i = positions().iterator();
int s0 = i.next(), s1;
while (i.hasNext()) {
s1 = i.next();
d += Math.abs(s0 - s1);
s0 = s1;
}
return d;
return d / (positions().size() - 1);
}
}

View File

@ -26,6 +26,8 @@
package net.yacy.kelondro.rwi;
import java.util.Collection;
import net.yacy.kelondro.index.Row.Entry;
public interface Reference {
@ -48,14 +50,12 @@ public interface Reference {
public void join(final Reference oe);
public int positions();
public Collection<Integer> positions();
public int maxposition();
public int minposition();
public int position(int p);
public int distance();
}