mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- removed locks from WordReference
- refactoring of HeapReader/Writer git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7514 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
cd19d0517e
commit
5e45ded8e2
|
@ -42,6 +42,7 @@ import net.yacy.kelondro.io.Writer;
|
||||||
import net.yacy.kelondro.logging.Log;
|
import net.yacy.kelondro.logging.Log;
|
||||||
import net.yacy.kelondro.order.ByteOrder;
|
import net.yacy.kelondro.order.ByteOrder;
|
||||||
import net.yacy.kelondro.order.CloneableIterator;
|
import net.yacy.kelondro.order.CloneableIterator;
|
||||||
|
import net.yacy.kelondro.order.Digest;
|
||||||
import net.yacy.kelondro.order.NaturalOrder;
|
import net.yacy.kelondro.order.NaturalOrder;
|
||||||
import net.yacy.kelondro.order.RotateIterator;
|
import net.yacy.kelondro.order.RotateIterator;
|
||||||
import net.yacy.kelondro.util.FileUtils;
|
import net.yacy.kelondro.util.FileUtils;
|
||||||
|
@ -147,17 +148,17 @@ public class HeapReader {
|
||||||
private boolean initIndexReadDump() {
|
private boolean initIndexReadDump() {
|
||||||
// look for an index dump and read it if it exist
|
// look for an index dump and read it if it exist
|
||||||
// if this is successful, return true; otherwise false
|
// if this is successful, return true; otherwise false
|
||||||
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile);
|
String fingerprint = fingerprintFileHash(this.heapFile);
|
||||||
if (fingerprint == null) {
|
if (fingerprint == null) {
|
||||||
Log.logSevere("HeapReader", "cannot generate a fingerprint for " + this.heapFile + ": null");
|
Log.logSevere("HeapReader", "cannot generate a fingerprint for " + this.heapFile + ": null");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
File fif = HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint);
|
File fif = fingerprintIndexFile(this.heapFile, fingerprint);
|
||||||
if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz");
|
if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz");
|
||||||
File fgf = HeapWriter.fingerprintGapFile(this.heapFile, fingerprint);
|
File fgf = fingerprintGapFile(this.heapFile, fingerprint);
|
||||||
if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz");
|
if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz");
|
||||||
if (!fif.exists() || !fgf.exists()) {
|
if (!fif.exists() || !fgf.exists()) {
|
||||||
HeapWriter.deleteAllFingerprints(this.heapFile);
|
deleteAllFingerprints(this.heapFile, fif.getName(), fgf.getName());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,6 +195,41 @@ public class HeapReader {
|
||||||
return !this.index.isEmpty();
|
return !this.index.isEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static File fingerprintIndexFile(File f, String fingerprint) {
|
||||||
|
assert f != null;
|
||||||
|
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static File fingerprintGapFile(File f, String fingerprint) {
|
||||||
|
assert f != null;
|
||||||
|
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap");
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static String fingerprintFileHash(File f) {
|
||||||
|
assert f != null;
|
||||||
|
assert f.exists() : "file = " + f.toString();
|
||||||
|
String fp = Digest.fastFingerprintB64(f, false);
|
||||||
|
assert fp != null : "file = " + f.toString();
|
||||||
|
if (fp == null) return null;
|
||||||
|
return fp.substring(0, 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void deleteAllFingerprints(File f, String exception1, String exception2) {
|
||||||
|
File d = f.getParentFile();
|
||||||
|
String n = f.getName();
|
||||||
|
String[] l = d.list();
|
||||||
|
for (int i = 0; i < l.length; i++) {
|
||||||
|
if (!l[i].startsWith(n)) continue;
|
||||||
|
if (exception1 != null && l[i].equals(exception1)) continue;
|
||||||
|
if (exception2 != null && l[i].equals(exception2)) continue;
|
||||||
|
if (l[i].endsWith(".idx") ||
|
||||||
|
l[i].endsWith(".gap") ||
|
||||||
|
l[i].endsWith(".idx.gz") ||
|
||||||
|
l[i].endsWith(".gap.gz")
|
||||||
|
) FileUtils.deletedelete(new File(d, l[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void initIndexReadFromHeap() throws IOException {
|
private void initIndexReadFromHeap() throws IOException {
|
||||||
// this initializes the this.index object by reading positions from the heap file
|
// this initializes the this.index object by reading positions from the heap file
|
||||||
Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait.");
|
Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait.");
|
||||||
|
@ -513,16 +549,16 @@ public class HeapReader {
|
||||||
// to speed up the next start
|
// to speed up the next start
|
||||||
try {
|
try {
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile);
|
String fingerprint = fingerprintFileHash(this.heapFile);
|
||||||
if (fingerprint == null) {
|
if (fingerprint == null) {
|
||||||
Log.logSevere("kelondroBLOBHeap", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null");
|
Log.logSevere("kelondroBLOBHeap", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null");
|
||||||
} else {
|
} else {
|
||||||
free.dump(HeapWriter.fingerprintGapFile(this.heapFile, fingerprint));
|
free.dump(fingerprintGapFile(this.heapFile, fingerprint));
|
||||||
}
|
}
|
||||||
free.clear();
|
free.clear();
|
||||||
free = null;
|
free = null;
|
||||||
if (fingerprint != null) {
|
if (fingerprint != null) {
|
||||||
index.dump(HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint));
|
index.dump(fingerprintIndexFile(this.heapFile, fingerprint));
|
||||||
Log.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
|
Log.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
|
||||||
}
|
}
|
||||||
index.close();
|
index.close();
|
||||||
|
|
|
@ -34,7 +34,6 @@ import net.yacy.kelondro.index.HandleMap;
|
||||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||||
import net.yacy.kelondro.logging.Log;
|
import net.yacy.kelondro.logging.Log;
|
||||||
import net.yacy.kelondro.order.ByteOrder;
|
import net.yacy.kelondro.order.ByteOrder;
|
||||||
import net.yacy.kelondro.order.Digest;
|
|
||||||
import net.yacy.kelondro.util.FileUtils;
|
import net.yacy.kelondro.util.FileUtils;
|
||||||
|
|
||||||
|
|
||||||
|
@ -110,34 +109,6 @@ public final class HeapWriter {
|
||||||
//os.flush(); // necessary? may cause bad IO performance :-(
|
//os.flush(); // necessary? may cause bad IO performance :-(
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static File fingerprintIndexFile(File f, String fingerprint) {
|
|
||||||
assert f != null;
|
|
||||||
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx");
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static File fingerprintGapFile(File f, String fingerprint) {
|
|
||||||
assert f != null;
|
|
||||||
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap");
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static String fingerprintFileHash(File f) {
|
|
||||||
assert f != null;
|
|
||||||
assert f.exists() : "file = " + f.toString();
|
|
||||||
String fp = Digest.fastFingerprintB64(f, false);
|
|
||||||
assert fp != null : "file = " + f.toString();
|
|
||||||
if (fp == null) return null;
|
|
||||||
return fp.substring(0, 12);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void deleteAllFingerprints(File f) {
|
|
||||||
File d = f.getParentFile();
|
|
||||||
String n = f.getName();
|
|
||||||
String[] l = d.list();
|
|
||||||
for (int i = 0; i < l.length; i++) {
|
|
||||||
if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap") || l[i].endsWith(".idx.gz") || l[i].endsWith(".gap.gz"))) FileUtils.deletedelete(new File(d, l[i]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* close the BLOB table
|
* close the BLOB table
|
||||||
* @throws
|
* @throws
|
||||||
|
@ -160,12 +131,12 @@ public final class HeapWriter {
|
||||||
// now we can create a dump of the index and the gap information
|
// now we can create a dump of the index and the gap information
|
||||||
// to speed up the next start
|
// to speed up the next start
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFileREADY);
|
String fingerprint = HeapReader.fingerprintFileHash(this.heapFileREADY);
|
||||||
if (fingerprint == null) {
|
if (fingerprint == null) {
|
||||||
Log.logSevere("kelondroBLOBHeapWriter", "cannot write a dump for " + heapFileREADY.getName()+ ": fingerprint is null");
|
Log.logSevere("kelondroBLOBHeapWriter", "cannot write a dump for " + heapFileREADY.getName()+ ": fingerprint is null");
|
||||||
} else {
|
} else {
|
||||||
new Gap().dump(fingerprintGapFile(this.heapFileREADY, fingerprint));
|
new Gap().dump(HeapReader.fingerprintGapFile(this.heapFileREADY, fingerprint));
|
||||||
index.dump(fingerprintIndexFile(this.heapFileREADY, fingerprint));
|
index.dump(HeapReader.fingerprintIndexFile(this.heapFileREADY, fingerprint));
|
||||||
Log.logInfo("kelondroBLOBHeapWriter", "wrote a dump for the " + this.index.size() + " index entries of " + heapFileREADY.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
|
Log.logInfo("kelondroBLOBHeapWriter", "wrote a dump for the " + this.index.size() + " index entries of " + heapFileREADY.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
|
||||||
}
|
}
|
||||||
index.close();
|
index.close();
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.citation;
|
package net.yacy.kelondro.data.citation;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.data.word.Word;
|
import net.yacy.kelondro.data.word.Word;
|
||||||
import net.yacy.kelondro.index.Column;
|
import net.yacy.kelondro.index.Column;
|
||||||
import net.yacy.kelondro.index.Row;
|
import net.yacy.kelondro.index.Row;
|
||||||
|
@ -214,7 +216,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,9 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.image;
|
package net.yacy.kelondro.data.image;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.data.word.Word;
|
import net.yacy.kelondro.data.word.Word;
|
||||||
import net.yacy.kelondro.index.Column;
|
import net.yacy.kelondro.index.Column;
|
||||||
import net.yacy.kelondro.index.Row;
|
import net.yacy.kelondro.index.Row;
|
||||||
|
@ -226,8 +229,8 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
|
||||||
return (int) this.entry.getColLong(col_hitcount);
|
return (int) this.entry.getColLong(col_hitcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
return 1;
|
return new ArrayList<Integer>(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int position(int p) {
|
public int position(int p) {
|
||||||
|
|
|
@ -26,7 +26,8 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.image;
|
package net.yacy.kelondro.data.image;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
import net.yacy.kelondro.index.Row.Entry;
|
import net.yacy.kelondro.index.Row.Entry;
|
||||||
import net.yacy.kelondro.order.Bitfield;
|
import net.yacy.kelondro.order.Bitfield;
|
||||||
|
@ -53,7 +54,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
posinphrase, posofphrase,
|
posinphrase, posofphrase,
|
||||||
urlcomps, urllength, virtualAge,
|
urlcomps, urllength, virtualAge,
|
||||||
wordsintext, wordsintitle;
|
wordsintext, wordsintitle;
|
||||||
private final ArrayList<Integer> positions;
|
private final ConcurrentLinkedQueue<Integer> positions;
|
||||||
public double termFrequency;
|
public double termFrequency;
|
||||||
|
|
||||||
public ImageReferenceVars(
|
public ImageReferenceVars(
|
||||||
|
@ -64,7 +65,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
final int hitcount, // how often appears this word in the text
|
final int hitcount, // how often appears this word in the text
|
||||||
final int wordcount, // total number of words
|
final int wordcount, // total number of words
|
||||||
final int phrasecount, // total number of phrases
|
final int phrasecount, // total number of phrases
|
||||||
final ArrayList<Integer> ps, // positions of words that are joined into the reference
|
final ConcurrentLinkedQueue<Integer> ps, // positions of words that are joined into the reference
|
||||||
final int posinphrase, // position of word in its phrase
|
final int posinphrase, // position of word in its phrase
|
||||||
final int posofphrase, // number of the phrase where word appears
|
final int posofphrase, // number of the phrase where word appears
|
||||||
final long lastmodified, // last-modified time of the document where word appears
|
final long lastmodified, // last-modified time of the document where word appears
|
||||||
|
@ -89,8 +90,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
this.llocal = outlinksSame;
|
this.llocal = outlinksSame;
|
||||||
this.lother = outlinksOther;
|
this.lother = outlinksOther;
|
||||||
this.phrasesintext = phrasecount;
|
this.phrasesintext = phrasecount;
|
||||||
this.positions = new ArrayList<Integer>(ps.size());
|
this.positions = new ConcurrentLinkedQueue<Integer>();
|
||||||
for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i));
|
for (Integer i: ps) this.positions.add(i);
|
||||||
this.posinphrase = posinphrase;
|
this.posinphrase = posinphrase;
|
||||||
this.posofphrase = posofphrase;
|
this.posofphrase = posofphrase;
|
||||||
this.urlcomps = urlComps;
|
this.urlcomps = urlComps;
|
||||||
|
@ -112,8 +113,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
this.llocal = e.llocal();
|
this.llocal = e.llocal();
|
||||||
this.lother = e.lother();
|
this.lother = e.lother();
|
||||||
this.phrasesintext = e.phrasesintext();
|
this.phrasesintext = e.phrasesintext();
|
||||||
this.positions = new ArrayList<Integer>(e.positions());
|
this.positions = new ConcurrentLinkedQueue<Integer>();
|
||||||
for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i));
|
for (Integer i: e.positions()) this.positions.add(i);
|
||||||
this.posinphrase = e.posinphrase();
|
this.posinphrase = e.posinphrase();
|
||||||
this.posofphrase = e.posofphrase();
|
this.posofphrase = e.posofphrase();
|
||||||
this.urlcomps = e.urlcomps();
|
this.urlcomps = e.urlcomps();
|
||||||
|
@ -227,12 +228,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
return posinphrase;
|
return posinphrase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
return this.positions.size();
|
return this.positions;
|
||||||
}
|
|
||||||
|
|
||||||
public int position(int p) {
|
|
||||||
return this.positions.get(p);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int posofphrase() {
|
public int posofphrase() {
|
||||||
|
@ -248,7 +245,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
hitcount, // how often appears this word in the text
|
hitcount, // how often appears this word in the text
|
||||||
wordsintext, // total number of words
|
wordsintext, // total number of words
|
||||||
phrasesintext, // total number of phrases
|
phrasesintext, // total number of phrases
|
||||||
positions.get(0), // position of word in all words
|
positions.iterator().next(), // position of word in all words
|
||||||
posinphrase, // position of word in its phrase
|
posinphrase, // position of word in its phrase
|
||||||
posofphrase, // number of the phrase where word appears
|
posofphrase, // number of the phrase where word appears
|
||||||
lastModified, // last-modified time of the document where word appears
|
lastModified, // last-modified time of the document where word appears
|
||||||
|
@ -347,7 +344,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
|
||||||
|
|
||||||
// combine the distance
|
// combine the distance
|
||||||
ImageReference oe = (ImageReference) r;
|
ImageReference oe = (ImageReference) r;
|
||||||
for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i));
|
for (Integer i: r.positions()) this.positions.add(i);
|
||||||
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
|
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
|
||||||
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
|
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.navigation;
|
package net.yacy.kelondro.data.navigation;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.data.word.Word;
|
import net.yacy.kelondro.data.word.Word;
|
||||||
import net.yacy.kelondro.index.Column;
|
import net.yacy.kelondro.index.Column;
|
||||||
import net.yacy.kelondro.index.Row;
|
import net.yacy.kelondro.index.Row;
|
||||||
|
@ -174,7 +176,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.navigation;
|
package net.yacy.kelondro.data.navigation;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.index.Row.Entry;
|
import net.yacy.kelondro.index.Row.Entry;
|
||||||
import net.yacy.kelondro.rwi.AbstractReference;
|
import net.yacy.kelondro.rwi.AbstractReference;
|
||||||
import net.yacy.kelondro.rwi.Reference;
|
import net.yacy.kelondro.rwi.Reference;
|
||||||
|
@ -146,7 +148,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,9 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.word;
|
package net.yacy.kelondro.data.word;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.index.Column;
|
import net.yacy.kelondro.index.Column;
|
||||||
import net.yacy.kelondro.index.Row;
|
import net.yacy.kelondro.index.Row;
|
||||||
import net.yacy.kelondro.index.Row.Entry;
|
import net.yacy.kelondro.index.Row.Entry;
|
||||||
|
@ -257,8 +260,8 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
|
||||||
return (int) this.entry.getColLong(col_hitcount);
|
return (int) this.entry.getColLong(col_hitcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
return 1;
|
return new ArrayList<Integer>(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int position(final int p) {
|
public int position(final int p) {
|
||||||
|
|
|
@ -26,11 +26,10 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.data.word;
|
package net.yacy.kelondro.data.word;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
|
|
||||||
|
@ -62,7 +61,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
posinphrase, posofphrase,
|
posinphrase, posofphrase,
|
||||||
urlcomps, urllength, virtualAge,
|
urlcomps, urllength, virtualAge,
|
||||||
wordsintext, wordsintitle;
|
wordsintext, wordsintitle;
|
||||||
private final List<Integer> positions;
|
private final ConcurrentLinkedQueue<Integer> positions;
|
||||||
public double termFrequency;
|
public double termFrequency;
|
||||||
|
|
||||||
public WordReferenceVars(
|
public WordReferenceVars(
|
||||||
|
@ -73,7 +72,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
final int hitcount, // how often appears this word in the text
|
final int hitcount, // how often appears this word in the text
|
||||||
final int wordcount, // total number of words
|
final int wordcount, // total number of words
|
||||||
final int phrasecount, // total number of phrases
|
final int phrasecount, // total number of phrases
|
||||||
final List<Integer> ps, // positions of words that are joined into the reference
|
final ConcurrentLinkedQueue<Integer> ps, // positions of words that are joined into the reference
|
||||||
final int posinphrase, // position of word in its phrase
|
final int posinphrase, // position of word in its phrase
|
||||||
final int posofphrase, // number of the phrase where word appears
|
final int posofphrase, // number of the phrase where word appears
|
||||||
final long lastmodified, // last-modified time of the document where word appears
|
final long lastmodified, // last-modified time of the document where word appears
|
||||||
|
@ -98,8 +97,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
this.llocal = outlinksSame;
|
this.llocal = outlinksSame;
|
||||||
this.lother = outlinksOther;
|
this.lother = outlinksOther;
|
||||||
this.phrasesintext = phrasecount;
|
this.phrasesintext = phrasecount;
|
||||||
this.positions = Collections.synchronizedList(new ArrayList<Integer>(ps.size()));
|
this.positions = new ConcurrentLinkedQueue<Integer>();
|
||||||
for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i));
|
for (Integer i: ps) this.positions.add(i);
|
||||||
this.posinphrase = posinphrase;
|
this.posinphrase = posinphrase;
|
||||||
this.posofphrase = posofphrase;
|
this.posofphrase = posofphrase;
|
||||||
this.urlcomps = urlComps;
|
this.urlcomps = urlComps;
|
||||||
|
@ -121,8 +120,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
this.llocal = e.llocal();
|
this.llocal = e.llocal();
|
||||||
this.lother = e.lother();
|
this.lother = e.lother();
|
||||||
this.phrasesintext = e.phrasesintext();
|
this.phrasesintext = e.phrasesintext();
|
||||||
this.positions = new ArrayList<Integer>(e.positions());
|
this.positions = new ConcurrentLinkedQueue<Integer>();
|
||||||
for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i));
|
for (Integer i: e.positions()) this.positions.add(i);
|
||||||
this.posinphrase = e.posinphrase();
|
this.posinphrase = e.posinphrase();
|
||||||
this.posofphrase = e.posofphrase();
|
this.posofphrase = e.posofphrase();
|
||||||
this.urlcomps = e.urlcomps();
|
this.urlcomps = e.urlcomps();
|
||||||
|
@ -237,14 +236,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
return posinphrase;
|
return posinphrase;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int positions() {
|
public Collection<Integer> positions() {
|
||||||
return this.positions.size();
|
return this.positions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int position(final int p) {
|
|
||||||
return this.positions.get(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int posofphrase() {
|
public int posofphrase() {
|
||||||
return posofphrase;
|
return posofphrase;
|
||||||
}
|
}
|
||||||
|
@ -258,7 +253,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
hitcount, // how often appears this word in the text
|
hitcount, // how often appears this word in the text
|
||||||
wordsintext, // total number of words
|
wordsintext, // total number of words
|
||||||
phrasesintext, // total number of phrases
|
phrasesintext, // total number of phrases
|
||||||
positions.get(0), // position of word in all words
|
positions.size() == 0 ? 1 : positions.iterator().next(), // position of word in all words
|
||||||
posinphrase, // position of word in its phrase
|
posinphrase, // position of word in its phrase
|
||||||
posofphrase, // number of the phrase where word appears
|
posofphrase, // number of the phrase where word appears
|
||||||
lastModified, // last-modified time of the document where word appears
|
lastModified, // last-modified time of the document where word appears
|
||||||
|
@ -357,7 +352,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
||||||
|
|
||||||
// combine the distance
|
// combine the distance
|
||||||
WordReference oe = (WordReference) r;
|
WordReference oe = (WordReference) r;
|
||||||
for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i));
|
for (Integer i: r.positions()) this.positions.add(i);
|
||||||
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
|
this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0;
|
||||||
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
|
this.posofphrase = Math.min(this.posofphrase, oe.posofphrase());
|
||||||
|
|
||||||
|
|
|
@ -26,26 +26,27 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.rwi;
|
package net.yacy.kelondro.rwi;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.Collection;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
||||||
public abstract class AbstractReference implements Reference {
|
public abstract class AbstractReference implements Reference {
|
||||||
|
|
||||||
protected static void a(List<Integer> a, int i) {
|
protected static void a(Collection<Integer> a, int i) {
|
||||||
assert a != null;
|
assert a != null;
|
||||||
if (i < 0) return; // signal for 'do nothing'
|
if (i < 0) return; // signal for 'do nothing'
|
||||||
synchronized (a) {
|
a.clear();
|
||||||
a.clear();
|
a.add(i);
|
||||||
a.add(i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
protected static int max(List<Integer> a, List<Integer> b) {
|
|
||||||
|
protected static int max(Collection<Integer> a, Collection<Integer> b) {
|
||||||
assert a != null;
|
assert a != null;
|
||||||
if (a.size() == 0) return max(b);
|
if (a.size() == 0) return max(b);
|
||||||
if (b.size() == 0) return max(a);
|
if (b.size() == 0) return max(a);
|
||||||
return Math.max(max(a), max(b));
|
return Math.max(max(a), max(b));
|
||||||
}
|
}
|
||||||
protected static int min(List<Integer> a, List<Integer> b) {
|
|
||||||
|
protected static int min(Collection<Integer> a, Collection<Integer> b) {
|
||||||
assert a != null;
|
assert a != null;
|
||||||
if (a.size() == 0) return min(b);
|
if (a.size() == 0) return min(b);
|
||||||
if (b.size() == 0) return min(a);
|
if (b.size() == 0) return min(a);
|
||||||
|
@ -56,46 +57,56 @@ public abstract class AbstractReference implements Reference {
|
||||||
return Math.min(ma, mb);
|
return Math.min(ma, mb);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int max(List<Integer> a) {
|
private static int max(Collection<Integer> a) {
|
||||||
assert a != null;
|
assert a != null;
|
||||||
if (a.size() == 0) return -1;
|
if (a.size() == 0) return -1;
|
||||||
if (a.size() == 1) return a.get(0);
|
Iterator<Integer> i = a.iterator();
|
||||||
if (a.size() == 2) return Math.max(a.get(0), a.get(1));
|
if (a.size() == 1) return i.next();
|
||||||
int r = a.get(0);
|
if (a.size() == 2) return Math.max(i.next(), i.next());
|
||||||
for (int i = 1; i < a.size(); i++) if (a.get(i) > r) r = a.get(i);
|
int r = i.next();
|
||||||
|
int s;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
s = i.next();
|
||||||
|
if (s > r) r = s;
|
||||||
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
private static int min(List<Integer> a) {
|
|
||||||
|
private static int min(Collection<Integer> a) {
|
||||||
assert a != null;
|
assert a != null;
|
||||||
if (a.size() == 0) return -1;
|
if (a.size() == 0) return -1;
|
||||||
if (a.size() == 1) return a.get(0);
|
Iterator<Integer> i = a.iterator();
|
||||||
if (a.size() == 2) return Math.min(a.get(0), a.get(1));
|
if (a.size() == 1) return i.next();
|
||||||
int r = a.get(0);
|
if (a.size() == 2) return Math.min(i.next(), i.next());
|
||||||
for (int i = 1; i < a.size(); i++) if (a.get(i) < r) r = a.get(i);
|
int r = i.next();
|
||||||
|
int s;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
s = i.next();
|
||||||
|
if (s <r) r = s;
|
||||||
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int maxposition() {
|
public int maxposition() {
|
||||||
assert positions() > 0;
|
assert positions().size() > 0;
|
||||||
if (positions() == 1) return position(0);
|
return max(positions());
|
||||||
int p = position(0);
|
|
||||||
for (int i = positions() - 1; i > 0; i--) if (position(i) > p) p = position(i);
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int minposition() {
|
public int minposition() {
|
||||||
assert positions() > 0;
|
assert positions().size() > 0;
|
||||||
if (positions() == 1) return position(0);
|
return min(positions());
|
||||||
int p = position(0);
|
|
||||||
for (int i = positions() - 1; i > 0; i--) if (position(i) < p) p = position(i);
|
|
||||||
return p;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int distance() {
|
public int distance() {
|
||||||
|
if (positions().size() < 2) return 0;
|
||||||
int d = 0;
|
int d = 0;
|
||||||
for (int i = 0; i < this.positions() - 1; i++) {
|
Iterator<Integer> i = positions().iterator();
|
||||||
d += Math.abs(this.position(i) - this.position(i + 1));
|
int s0 = i.next(), s1;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
s1 = i.next();
|
||||||
|
d += Math.abs(s0 - s1);
|
||||||
|
s0 = s1;
|
||||||
}
|
}
|
||||||
return d;
|
return d / (positions().size() - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
|
|
||||||
package net.yacy.kelondro.rwi;
|
package net.yacy.kelondro.rwi;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import net.yacy.kelondro.index.Row.Entry;
|
import net.yacy.kelondro.index.Row.Entry;
|
||||||
|
|
||||||
public interface Reference {
|
public interface Reference {
|
||||||
|
@ -48,14 +50,12 @@ public interface Reference {
|
||||||
|
|
||||||
public void join(final Reference oe);
|
public void join(final Reference oe);
|
||||||
|
|
||||||
public int positions();
|
public Collection<Integer> positions();
|
||||||
|
|
||||||
public int maxposition();
|
public int maxposition();
|
||||||
|
|
||||||
public int minposition();
|
public int minposition();
|
||||||
|
|
||||||
public int position(int p);
|
|
||||||
|
|
||||||
public int distance();
|
public int distance();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user