mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
performance hack
This commit is contained in:
parent
41536eb4a2
commit
ef78f22ee1
|
@ -47,7 +47,7 @@ public class SnippetExtractor {
|
||||||
int linenumber = 0;
|
int linenumber = 0;
|
||||||
int fullmatchcounter = 0;
|
int fullmatchcounter = 0;
|
||||||
lookup: for (final StringBuilder sentence: sentences) {
|
lookup: for (final StringBuilder sentence: sentences) {
|
||||||
hs = WordTokenizer.hashSentence(sentence.toString(), null);
|
hs = WordTokenizer.hashSentence(sentence.toString(), null, 100);
|
||||||
positions = new TreeSet<Integer>();
|
positions = new TreeSet<Integer>();
|
||||||
for (final byte[] word: queryhashes) {
|
for (final byte[] word: queryhashes) {
|
||||||
pos = hs.get(word);
|
pos = hs.get(word);
|
||||||
|
@ -79,27 +79,27 @@ public class SnippetExtractor {
|
||||||
} catch (UnsupportedOperationException e) {
|
} catch (UnsupportedOperationException e) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
snippetString = tsr.snippetString;
|
this.snippetString = tsr.snippetString;
|
||||||
if (snippetString != null && snippetString.length() > 0) {
|
if (this.snippetString != null && this.snippetString.length() > 0) {
|
||||||
remainingHashes = tsr.remainingHashes;
|
this.remainingHashes = tsr.remainingHashes;
|
||||||
if (remainingHashes.isEmpty()) {
|
if (this.remainingHashes.isEmpty()) {
|
||||||
// we have found the snippet
|
// we have found the snippet
|
||||||
return; // finished!
|
return; // finished!
|
||||||
} else if (remainingHashes.size() < queryhashes.size()) {
|
} else if (this.remainingHashes.size() < queryhashes.size()) {
|
||||||
// the result has not all words in it.
|
// the result has not all words in it.
|
||||||
// find another sentence that represents the missing other words
|
// find another sentence that represents the missing other words
|
||||||
// and find recursively more sentences
|
// and find recursively more sentences
|
||||||
maxLength = maxLength - snippetString.length();
|
maxLength = maxLength - this.snippetString.length();
|
||||||
if (maxLength < 20) maxLength = 20;
|
if (maxLength < 20) maxLength = 20;
|
||||||
try {
|
try {
|
||||||
tsr = new SnippetExtractor(order.values(), remainingHashes, maxLength);
|
tsr = new SnippetExtractor(order.values(), this.remainingHashes, maxLength);
|
||||||
} catch (UnsupportedOperationException e) {
|
} catch (UnsupportedOperationException e) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
final String nextSnippet = tsr.snippetString;
|
final String nextSnippet = tsr.snippetString;
|
||||||
if (nextSnippet == null) return;
|
if (nextSnippet == null) return;
|
||||||
snippetString = snippetString + (" / " + nextSnippet);
|
this.snippetString = this.snippetString + (" / " + nextSnippet);
|
||||||
remainingHashes = tsr.remainingHashes;
|
this.remainingHashes = tsr.remainingHashes;
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
// error
|
// error
|
||||||
|
@ -126,7 +126,7 @@ public class SnippetExtractor {
|
||||||
byte[] hash;
|
byte[] hash;
|
||||||
|
|
||||||
// find all hashes that appear in the sentence
|
// find all hashes that appear in the sentence
|
||||||
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
|
final Map<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
|
||||||
final Iterator<byte[]> j = queryhashes.iterator();
|
final Iterator<byte[]> j = queryhashes.iterator();
|
||||||
Integer pos;
|
Integer pos;
|
||||||
int p, minpos = sentence.length(), maxpos = -1;
|
int p, minpos = sentence.length(), maxpos = -1;
|
||||||
|
|
|
@ -68,10 +68,12 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean hasMoreElements() {
|
public boolean hasMoreElements() {
|
||||||
return this.buffer != null;
|
return this.buffer != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public StringBuilder nextElement() {
|
public StringBuilder nextElement() {
|
||||||
final StringBuilder r = (this.buffer == null) ? null : this.buffer;
|
final StringBuilder r = (this.buffer == null) ? null : this.buffer;
|
||||||
this.buffer = nextElement0();
|
this.buffer = nextElement0();
|
||||||
|
@ -81,7 +83,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
e.close();
|
this.e.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
|
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
|
||||||
|
@ -139,10 +141,12 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public boolean hasMoreElements() {
|
public boolean hasMoreElements() {
|
||||||
return this.buffer != null;
|
return this.buffer != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public StringBuilder nextElement() {
|
public StringBuilder nextElement() {
|
||||||
final StringBuilder r = this.buffer;
|
final StringBuilder r = this.buffer;
|
||||||
this.buffer = nextElement0();
|
this.buffer = nextElement0();
|
||||||
|
@ -150,7 +154,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
e.close();
|
this.e.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -177,7 +181,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
* @param sentence the sentence to be tokenized
|
* @param sentence the sentence to be tokenized
|
||||||
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
|
* @return a ordered map containing word hashes as key and positions as value. The map is orderd by the hash ordering
|
||||||
*/
|
*/
|
||||||
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib) {
|
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) {
|
||||||
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
|
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
|
||||||
final WordTokenizer words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
|
final WordTokenizer words = new WordTokenizer(new ByteArrayInputStream(UTF8.getBytes(sentence)), meaningLib);
|
||||||
try {
|
try {
|
||||||
|
@ -185,7 +189,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
||||||
StringBuilder word;
|
StringBuilder word;
|
||||||
byte[] hash;
|
byte[] hash;
|
||||||
Integer oldpos;
|
Integer oldpos;
|
||||||
while (words.hasMoreElements()) {
|
while (words.hasMoreElements() && maxlength-- > 0) {
|
||||||
word = words.nextElement();
|
word = words.nextElement();
|
||||||
hash = Word.word2hash(word);
|
hash = Word.word2hash(word);
|
||||||
|
|
||||||
|
|
|
@ -129,12 +129,14 @@ public class Digest {
|
||||||
digest.reset();
|
digest.reset();
|
||||||
} catch (final NoSuchAlgorithmException e) {
|
} catch (final NoSuchAlgorithmException e) {
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
digest.reset(); // they should all be reseted but anyway; this is safe
|
||||||
}
|
}
|
||||||
byte[] keyBytes;
|
byte[] keyBytes;
|
||||||
keyBytes = UTF8.getBytes(key);
|
keyBytes = UTF8.getBytes(key);
|
||||||
digest.update(keyBytes);
|
digest.update(keyBytes);
|
||||||
final byte[] result = digest.digest();
|
final byte[] result = digest.digest();
|
||||||
digest.reset();
|
digest.reset(); // to be prepared for next
|
||||||
try {
|
try {
|
||||||
digestPool.put(digest);
|
digestPool.put(digest);
|
||||||
//System.out.println("Digest Pool size = " + digestPool.size());
|
//System.out.println("Digest Pool size = " + digestPool.size());
|
||||||
|
|
|
@ -51,8 +51,8 @@ import net.yacy.kelondro.order.Base64Order;
|
||||||
import net.yacy.kelondro.util.ByteArray;
|
import net.yacy.kelondro.util.ByteArray;
|
||||||
import net.yacy.repository.Blacklist;
|
import net.yacy.repository.Blacklist;
|
||||||
import net.yacy.search.Switchboard;
|
import net.yacy.search.Switchboard;
|
||||||
import de.anomic.crawler.retrieval.Request;
|
|
||||||
import de.anomic.crawler.ZURL.FailCategory;
|
import de.anomic.crawler.ZURL.FailCategory;
|
||||||
|
import de.anomic.crawler.retrieval.Request;
|
||||||
|
|
||||||
|
|
||||||
public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
|
public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
|
||||||
|
@ -117,10 +117,12 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
||||||
return Base64Order.enhancedCoder.equal(this.href.hash(), other.href.hash());
|
return Base64Order.enhancedCoder.equal(this.href.hash(), other.href.hash());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int compareTo(final MediaSnippet o) {
|
public int compareTo(final MediaSnippet o) {
|
||||||
return Base64Order.enhancedCoder.compare(this.href.hash(), o.href.hash());
|
return Base64Order.enhancedCoder.compare(this.href.hash(), o.href.hash());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public int compare(final MediaSnippet o1, final MediaSnippet o2) {
|
public int compare(final MediaSnippet o1, final MediaSnippet o2) {
|
||||||
return o1.compareTo(o2);
|
return o1.compareTo(o2);
|
||||||
}
|
}
|
||||||
|
@ -217,7 +219,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
||||||
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
|
private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
|
||||||
// remove all hashes that appear in the sentence
|
// remove all hashes that appear in the sentence
|
||||||
if (sentence == null) return queryhashes;
|
if (sentence == null) return queryhashes;
|
||||||
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null);
|
final SortedMap<byte[], Integer> hs = WordTokenizer.hashSentence(sentence, null, 100);
|
||||||
final Iterator<byte[]> j = queryhashes.iterator();
|
final Iterator<byte[]> j = queryhashes.iterator();
|
||||||
byte[] hash;
|
byte[] hash;
|
||||||
Integer pos;
|
Integer pos;
|
||||||
|
|
|
@ -497,8 +497,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
||||||
|
|
||||||
private static boolean containsAllHashes(
|
private static boolean containsAllHashes(
|
||||||
final String sentence, final HandleSet queryhashes) {
|
final String sentence, final HandleSet queryhashes) {
|
||||||
final SortedMap<byte[], Integer> m =
|
final SortedMap<byte[], Integer> m = WordTokenizer.hashSentence(sentence, null, 100);
|
||||||
WordTokenizer.hashSentence(sentence, null);
|
|
||||||
for (final byte[] b : queryhashes) {
|
for (final byte[] b : queryhashes) {
|
||||||
if (!(m.containsKey(b))) {
|
if (!(m.containsKey(b))) {
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user