mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
hacks to prevent storage of data longer than necessary during search and
some speed enhancements. This should reduce the memory usage during heavy-load search a bit.
This commit is contained in:
parent
3c3cb78555
commit
9bb7eab389
|
@ -1095,7 +1095,7 @@ federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
|
|||
federated.service.solr.indexing.sharding = MODULO_HOST_MD5
|
||||
# the lazy attribute causes that fields containing "" or 0 are not added and not written
|
||||
federated.service.solr.indexing.lazy = true
|
||||
federated.service.solr.indexing.timeout = 10000
|
||||
federated.service.solr.indexing.timeout = 6000
|
||||
|
||||
# temporary definition of backend services to use.
|
||||
# After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db.
|
||||
|
|
|
@ -31,7 +31,6 @@ import java.io.IOException;
|
|||
import java.net.MalformedURLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import net.yacy.cora.document.encoding.ASCII;
|
||||
|
@ -290,17 +289,21 @@ public class ViewFile {
|
|||
// Search word highlighting
|
||||
for (final StringBuilder s: sentences) {
|
||||
sentence = s.toString();
|
||||
Enumeration<StringBuilder> tokens = null;
|
||||
tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib);
|
||||
while (tokens.hasMoreElements()) {
|
||||
token = tokens.nextElement();
|
||||
if (token.length() > 0) {
|
||||
prop.put("viewMode_words_" + i + "_nr", i + 1);
|
||||
prop.put("viewMode_words_" + i + "_word", token.toString());
|
||||
prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
|
||||
dark = !dark;
|
||||
i++;
|
||||
WordTokenizer tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib);
|
||||
try {
|
||||
while (tokens.hasMoreElements()) {
|
||||
token = tokens.nextElement();
|
||||
if (token.length() > 0) {
|
||||
prop.put("viewMode_words_" + i + "_nr", i + 1);
|
||||
prop.put("viewMode_words_" + i + "_word", token.toString());
|
||||
prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
|
||||
dark = !dark;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
tokens.close();
|
||||
tokens = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,14 +28,15 @@ import java.io.InputStream;
|
|||
import java.io.RandomAccessFile;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.LinkedBlockingDeque;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import net.yacy.cora.document.encoding.UTF8;
|
||||
|
@ -48,7 +49,7 @@ import net.yacy.cora.util.Memory;
|
|||
|
||||
public class Digest {
|
||||
|
||||
public static BlockingQueue<MessageDigest> digestPool = new LinkedBlockingDeque<MessageDigest>();
|
||||
public static Queue<MessageDigest> digestPool = new ConcurrentLinkedQueue<MessageDigest>();
|
||||
|
||||
private static final int md5CacheSize = Math.max(1000, Math.min(1000000, (int) (Memory.available() / 50000L)));
|
||||
private static ARC<String, byte[]> md5Cache = null;
|
||||
|
@ -138,11 +139,8 @@ public class Digest {
|
|||
digest.update(keyBytes);
|
||||
final byte[] result = digest.digest();
|
||||
digest.reset(); // to be prepared for next
|
||||
try {
|
||||
digestPool.put(digest);
|
||||
//System.out.println("Digest Pool size = " + digestPool.size());
|
||||
} catch (final InterruptedException e ) {
|
||||
}
|
||||
digestPool.add(digest);
|
||||
//System.out.println("Digest Pool size = " + digestPool.size());
|
||||
|
||||
// update the cache
|
||||
md5Cache.insertIfAbsent(key, result); // prevent expensive MD5 computation and encoding
|
||||
|
|
|
@ -95,84 +95,85 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
|
|||
buffer.append(document.dc_title().toLowerCase());
|
||||
for (String s:document.dc_description()) buffer.append(s.toLowerCase());
|
||||
buffer.append(document.dc_subject(' ').toLowerCase());
|
||||
final WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib);
|
||||
try {
|
||||
int score = 0;
|
||||
int score = 0;
|
||||
|
||||
// get phrases
|
||||
final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2);
|
||||
phrases.putAll(getPhrases(document, 3));
|
||||
final Iterator<String> iter = phrases.keySet().iterator();
|
||||
while(iter.hasNext()) {
|
||||
// get phrases
|
||||
final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2);
|
||||
phrases.putAll(getPhrases(document, 3));
|
||||
final Iterator<String> iter = phrases.keySet().iterator();
|
||||
while(iter.hasNext()) {
|
||||
score = 10;
|
||||
final String phrase = iter.next();
|
||||
if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
|
||||
score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
|
||||
}
|
||||
if(isDigitSpace(phrase)) {
|
||||
score = 10;
|
||||
final String phrase = iter.next();
|
||||
if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
|
||||
score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
|
||||
}
|
||||
if(isDigitSpace(phrase)) {
|
||||
score = 10;
|
||||
}
|
||||
if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
|
||||
score = score * 10;
|
||||
}
|
||||
if (tags.containsKey(phrase)) {
|
||||
score = score * 20;
|
||||
}
|
||||
topwords.add(new YMarkTag(phrase, score));
|
||||
pwords.append(phrase);
|
||||
pwords.append(' ');
|
||||
}
|
||||
|
||||
// loop through potential tag and rank them
|
||||
while(tokens.hasMoreElements()) {
|
||||
score = 0;
|
||||
token = tokens.nextElement();
|
||||
|
||||
// check if the token appears in the text
|
||||
if (words.containsKey(token.toString())) {
|
||||
final Word word = words.get(token.toString());
|
||||
// token appears in text and matches an existing bookmark tag
|
||||
if (tags.containsKey(token.toString())) {
|
||||
score = word.occurrences() * tags.get(token.toString()).size() * 200;
|
||||
}
|
||||
// token appears in text and has more than 3 characters
|
||||
else if (token.length()>3) {
|
||||
score = word.occurrences() * 100;
|
||||
}
|
||||
// if token is already part of a phrase, reduce score
|
||||
if(pwords.toString().indexOf(token.toString())>1) {
|
||||
score = score / 3;
|
||||
}
|
||||
topwords.add(new YMarkTag(token.toString(), score));
|
||||
}
|
||||
if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
|
||||
score = score * 10;
|
||||
}
|
||||
score = 0;
|
||||
buffer.setLength(0);
|
||||
for(final YMarkTag tag : topwords) {
|
||||
if(score < max) {
|
||||
if(tag.size() > 100) {
|
||||
buffer.append(tag.name());
|
||||
buffer.append(YMarkUtil.TAGS_SEPARATOR);
|
||||
score++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
if (tags.containsKey(phrase)) {
|
||||
score = score * 20;
|
||||
}
|
||||
final String clean = YMarkUtil.cleanTagsString(buffer.toString());
|
||||
if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
|
||||
return MultiProtocolURL.getFileExtension(document.dc_source().getFileName());
|
||||
}
|
||||
return clean;
|
||||
} finally {
|
||||
tokens.close();
|
||||
topwords.add(new YMarkTag(phrase, score));
|
||||
pwords.append(phrase);
|
||||
pwords.append(' ');
|
||||
}
|
||||
|
||||
// loop through potential tag and rank them
|
||||
WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib);
|
||||
try {
|
||||
while (tokens.hasMoreElements()) {
|
||||
score = 0;
|
||||
token = tokens.nextElement();
|
||||
|
||||
// check if the token appears in the text
|
||||
if (words.containsKey(token.toString())) {
|
||||
final Word word = words.get(token.toString());
|
||||
// token appears in text and matches an existing bookmark tag
|
||||
if (tags.containsKey(token.toString())) {
|
||||
score = word.occurrences() * tags.get(token.toString()).size() * 200;
|
||||
}
|
||||
// token appears in text and has more than 3 characters
|
||||
else if (token.length()>3) {
|
||||
score = word.occurrences() * 100;
|
||||
}
|
||||
// if token is already part of a phrase, reduce score
|
||||
if(pwords.toString().indexOf(token.toString())>1) {
|
||||
score = score / 3;
|
||||
}
|
||||
topwords.add(new YMarkTag(token.toString(), score));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
tokens.close();
|
||||
tokens = null;
|
||||
}
|
||||
score = 0;
|
||||
buffer.setLength(0);
|
||||
for(final YMarkTag tag : topwords) {
|
||||
if(score < max) {
|
||||
if(tag.size() > 100) {
|
||||
buffer.append(tag.name());
|
||||
buffer.append(YMarkUtil.TAGS_SEPARATOR);
|
||||
score++;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
final String clean = YMarkUtil.cleanTagsString(buffer.toString());
|
||||
if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
|
||||
return MultiProtocolURL.getFileExtension(document.dc_source().getFileName());
|
||||
}
|
||||
return clean;
|
||||
}
|
||||
|
||||
private static TreeMap<String, YMarkTag> getPhrases(final Document document, final int size) {
|
||||
final TreeMap<String, YMarkTag> phrases = new TreeMap<String, YMarkTag>();
|
||||
final StringBuilder phrase = new StringBuilder(128);
|
||||
final WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib);
|
||||
WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib);
|
||||
try {
|
||||
StringBuilder token;
|
||||
int count = 0;
|
||||
|
@ -206,6 +207,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
|
|||
return phrases;
|
||||
} finally {
|
||||
tokens.close();
|
||||
tokens = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -285,6 +285,7 @@ public final class Condenser {
|
|||
}
|
||||
} finally {
|
||||
wordenum.close();
|
||||
wordenum = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,7 +346,7 @@ public final class Condenser {
|
|||
if (LibraryProvider.autotagging.isEmpty()) doAutotagging = false;
|
||||
|
||||
// read source
|
||||
final WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
|
||||
WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
|
||||
try {
|
||||
while (wordenum.hasMoreElements()) {
|
||||
word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
|
||||
|
@ -420,6 +421,7 @@ public final class Condenser {
|
|||
}
|
||||
} finally {
|
||||
wordenum.close();
|
||||
wordenum = null;
|
||||
}
|
||||
|
||||
if (pseudostemming) {
|
||||
|
|
|
@ -53,25 +53,12 @@ public class SentenceReader implements Iterator<StringBuilder>, Iterable<StringB
|
|||
}
|
||||
|
||||
private StringBuilder nextElement0() {
|
||||
final StringBuilder s = readSentence();
|
||||
//System.out.println(" SENTENCE='" + s + "'"); // DEBUG
|
||||
if (s == null) return null;
|
||||
return s;
|
||||
}
|
||||
|
||||
private StringBuilder readSentence() {
|
||||
final StringBuilder s = new StringBuilder(80);
|
||||
int nextChar;
|
||||
char c, lc = ' '; // starting with ' ' as last character prevents that the result string starts with a ' '
|
||||
|
||||
// find sentence end
|
||||
while (true) {
|
||||
if (this.pos >= this.text.length()) break;
|
||||
nextChar = this.text.charAt(this.pos++);
|
||||
//System.out.print((char) nextChar); // DEBUG
|
||||
if (nextChar < 0) {
|
||||
break;
|
||||
}
|
||||
while (this.pos < this.text.length() && (nextChar = this.text.charAt(this.pos++)) > 0) {
|
||||
c = (char) nextChar;
|
||||
if (this.pre && (nextChar == 10 || nextChar == 13)) break;
|
||||
if (c < ' ') c = ' ';
|
||||
|
|
|
@ -39,7 +39,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
|||
// this enumeration removes all words that contain either wrong characters or are too short
|
||||
|
||||
private StringBuilder buffer = null;
|
||||
private final unsievedWordsEnum e;
|
||||
private unsievedWordsEnum e;
|
||||
private final WordCache meaningLib;
|
||||
|
||||
public WordTokenizer(final SentenceReader sr, final WordCache meaningLib) {
|
||||
|
@ -82,13 +82,15 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
|||
|
||||
public synchronized void close() {
|
||||
this.e.close();
|
||||
this.e = null;
|
||||
this.buffer = null;
|
||||
}
|
||||
|
||||
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
|
||||
// returns an enumeration of StringBuilder Objects
|
||||
private StringBuilder buffer = null;
|
||||
private final SentenceReader sr;
|
||||
private final List<StringBuilder> s;
|
||||
private SentenceReader sr;
|
||||
private List<StringBuilder> s;
|
||||
private int sIndex;
|
||||
|
||||
public unsievedWordsEnum(final SentenceReader sr0) {
|
||||
|
@ -152,7 +154,11 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
|||
}
|
||||
|
||||
public synchronized void close() {
|
||||
this.sIndex = 0;
|
||||
this.s.clear();
|
||||
this.s = null;
|
||||
this.sr.close();
|
||||
this.sr = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,7 +187,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
|||
*/
|
||||
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) {
|
||||
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
|
||||
final WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
|
||||
WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
|
||||
try {
|
||||
int pos = 0;
|
||||
StringBuilder word;
|
||||
|
@ -202,6 +208,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
|
|||
return map;
|
||||
} finally {
|
||||
words.close();
|
||||
words = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -902,10 +902,12 @@ public final class Protocol {
|
|||
Map<String, String> resultMap = null;
|
||||
String key = "";
|
||||
final ContentBody keyBody = parts.get("key");
|
||||
if ( keyBody != null ) {
|
||||
final ByteArrayOutputStream baos = new ByteArrayOutputStream(20);
|
||||
if (keyBody != null) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(20);
|
||||
keyBody.writeTo(baos);
|
||||
key = baos.toString();
|
||||
key = UTF8.String(baos.toByteArray());
|
||||
baos.close();
|
||||
baos = null;
|
||||
}
|
||||
|
||||
String filter = event.query.urlMask.pattern().toString();
|
||||
|
@ -1037,67 +1039,70 @@ public final class Protocol {
|
|||
RemoteInstance instance = null;
|
||||
SolrConnector solrConnector = null;
|
||||
SolrDocumentList docList = null;
|
||||
QueryResponse rsp = null;
|
||||
if (localsearch) {
|
||||
// search the local index
|
||||
try {
|
||||
rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
|
||||
docList = rsp.getResults();
|
||||
} catch (final Throwable e) {
|
||||
Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
|
||||
final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 10000);
|
||||
instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
|
||||
solrConnector = new RemoteSolrConnector(instance, "solr");
|
||||
rsp = solrConnector.getResponseByParams(solrQuery);
|
||||
docList = rsp.getResults();
|
||||
solrConnector.close();
|
||||
instance.close();
|
||||
// no need to close this here because that sends a commit to remote solr which is not wanted here
|
||||
} catch (final Throwable e) {
|
||||
Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// evaluate facets
|
||||
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.size());
|
||||
for (String field: event.query.facetfields) {
|
||||
FacetField facet = rsp.getFacetField(field);
|
||||
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
List<Count> values = facet == null ? null : facet.getValues();
|
||||
if (values == null) continue;
|
||||
for (Count ff: values) {
|
||||
int c = (int) ff.getCount();
|
||||
if (c == 0) continue;
|
||||
result.set(ff.getName(), c);
|
||||
}
|
||||
if (result.size() > 0) facets.put(field, result);
|
||||
}
|
||||
|
||||
// evaluate snippets
|
||||
Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
|
||||
Map<String, String> snippets = new HashMap<String, String>(); // this will be a list of urlhash-snippet entries
|
||||
if (rawsnippets != null) {
|
||||
nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) {
|
||||
Map<String, List<String>> rs = re.getValue();
|
||||
for (CollectionSchema field: snippetFields) {
|
||||
if (rs.containsKey(field.getSolrFieldName())) {
|
||||
List<String> s = rs.get(field.getSolrFieldName());
|
||||
if (s.size() > 0) {
|
||||
snippets.put(re.getKey(), s.get(0));
|
||||
continue nextsnippet;
|
||||
{// encapsulate expensive solr QueryResponse object
|
||||
QueryResponse rsp = null;
|
||||
if (localsearch) {
|
||||
// search the local index
|
||||
try {
|
||||
rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
|
||||
docList = rsp.getResults();
|
||||
} catch (final Throwable e) {
|
||||
Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
|
||||
final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000);
|
||||
instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
|
||||
solrConnector = new RemoteSolrConnector(instance, "solr");
|
||||
rsp = solrConnector.getResponseByParams(solrQuery);
|
||||
docList = rsp.getResults();
|
||||
solrConnector.close();
|
||||
instance.close();
|
||||
// no need to close this here because that sends a commit to remote solr which is not wanted here
|
||||
} catch (final Throwable e) {
|
||||
Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// evaluate facets
|
||||
for (String field: event.query.facetfields) {
|
||||
FacetField facet = rsp.getFacetField(field);
|
||||
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
|
||||
List<Count> values = facet == null ? null : facet.getValues();
|
||||
if (values == null) continue;
|
||||
for (Count ff: values) {
|
||||
int c = (int) ff.getCount();
|
||||
if (c == 0) continue;
|
||||
result.set(ff.getName(), c);
|
||||
}
|
||||
if (result.size() > 0) facets.put(field, result);
|
||||
}
|
||||
|
||||
// evaluate snippets
|
||||
Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
|
||||
if (rawsnippets != null) {
|
||||
nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) {
|
||||
Map<String, List<String>> rs = re.getValue();
|
||||
for (CollectionSchema field: snippetFields) {
|
||||
if (rs.containsKey(field.getSolrFieldName())) {
|
||||
List<String> s = rs.get(field.getSolrFieldName());
|
||||
if (s.size() > 0) {
|
||||
snippets.put(re.getKey(), s.get(0));
|
||||
continue nextsnippet;
|
||||
}
|
||||
}
|
||||
}
|
||||
// no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
|
||||
}
|
||||
// no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
|
||||
}
|
||||
rsp = null;
|
||||
}
|
||||
|
||||
|
||||
// evaluate result
|
||||
List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
|
||||
if (docList == null || docList.size() == 0) {
|
||||
|
@ -1164,24 +1169,25 @@ public final class Protocol {
|
|||
// add the url entry to the word indexes
|
||||
container.add(urlEntry);
|
||||
}
|
||||
final int dls = docList.size();
|
||||
final int numFound = (int) docList.getNumFound();
|
||||
docList.clear();
|
||||
docList = null;
|
||||
if (localsearch) {
|
||||
event.addNodes(container, facets, snippets, true, "localpeer", (int) docList.getNumFound());
|
||||
event.addNodes(container, facets, snippets, true, "localpeer", numFound);
|
||||
event.addFinalize();
|
||||
event.addExpectedRemoteReferences(-count);
|
||||
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + docList.getNumFound() + " references");
|
||||
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
|
||||
} else {
|
||||
for (SolrInputDocument doc: docs) {
|
||||
event.query.getSegment().putDocumentInQueue(doc);
|
||||
}
|
||||
docs.clear(); docs = null;
|
||||
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, (int) docList.getNumFound());
|
||||
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
|
||||
event.addFinalize();
|
||||
event.addExpectedRemoteReferences(-count);
|
||||
Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + docList.getNumFound() + " references");
|
||||
Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references");
|
||||
}
|
||||
final int dls = docList.size();
|
||||
docList.clear();
|
||||
docList = null;
|
||||
if (solrConnector != null) solrConnector.close();
|
||||
if (instance != null) instance.close();
|
||||
return dls;
|
||||
|
|
|
@ -191,14 +191,17 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
|||
// we did not find everything in the metadata, look further into the document itself.
|
||||
|
||||
// first acquire the sentences:
|
||||
final String solrText = row.getText();
|
||||
String solrText = row.getText();
|
||||
if (solrText != null) {
|
||||
// compute sentences from solr query
|
||||
final SentenceReader sr = new SentenceReader(solrText, pre);
|
||||
SentenceReader sr = new SentenceReader(solrText, pre);
|
||||
sentences = new ArrayList<StringBuilder>();
|
||||
while (sr.hasNext()) {
|
||||
sentences.add(sr.next());
|
||||
}
|
||||
sr.close();
|
||||
sr = null;
|
||||
solrText = null;
|
||||
} else if (net.yacy.crawler.data.Cache.has(url.hash())) {
|
||||
// get the sentences from the cache
|
||||
final Request request = loader == null ? null : loader.request(url, true, reindexing);
|
||||
|
@ -213,6 +216,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
|||
try {
|
||||
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
|
||||
sentences = document.getSentences(pre);
|
||||
response = null;
|
||||
document = null;
|
||||
} catch (final Parser.Failure e) {
|
||||
}
|
||||
}
|
||||
|
@ -254,6 +259,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
|||
init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null);
|
||||
return;
|
||||
}
|
||||
sentences = null; // we don't need this here any more
|
||||
|
||||
// try to load the resource from the cache
|
||||
Response response = null;
|
||||
|
@ -311,6 +317,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
|||
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage());
|
||||
return;
|
||||
}
|
||||
sentences = null;
|
||||
} //encapsulate potential expensive sentences END
|
||||
|
||||
// compute snippet from media - attention document closed above!
|
||||
|
|
Loading…
Reference in New Issue
Block a user