hacks to prevent storage of data longer than necessary during search and

some speed enhancements. This should reduce the memory usage during
heavy-load search a bit.
This commit is contained in:
Michael Peter Christen 2013-10-25 15:05:30 +02:00
parent 3c3cb78555
commit 9bb7eab389
9 changed files with 183 additions and 171 deletions

View File

@ -1095,7 +1095,7 @@ federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
federated.service.solr.indexing.sharding = MODULO_HOST_MD5 federated.service.solr.indexing.sharding = MODULO_HOST_MD5
# the lazy attribute causes that fields containing "" or 0 are not added and not written # the lazy attribute causes that fields containing "" or 0 are not added and not written
federated.service.solr.indexing.lazy = true federated.service.solr.indexing.lazy = true
federated.service.solr.indexing.timeout = 10000 federated.service.solr.indexing.timeout = 6000
# temporary definition of backend services to use. # temporary definition of backend services to use.
# After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db. # After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db.

View File

@ -31,7 +31,6 @@ import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Enumeration;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
@ -290,17 +289,21 @@ public class ViewFile {
// Search word highlighting // Search word highlighting
for (final StringBuilder s: sentences) { for (final StringBuilder s: sentences) {
sentence = s.toString(); sentence = s.toString();
Enumeration<StringBuilder> tokens = null; WordTokenizer tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib);
tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib); try {
while (tokens.hasMoreElements()) { while (tokens.hasMoreElements()) {
token = tokens.nextElement(); token = tokens.nextElement();
if (token.length() > 0) { if (token.length() > 0) {
prop.put("viewMode_words_" + i + "_nr", i + 1); prop.put("viewMode_words_" + i + "_nr", i + 1);
prop.put("viewMode_words_" + i + "_word", token.toString()); prop.put("viewMode_words_" + i + "_word", token.toString());
prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0"); prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
dark = !dark; dark = !dark;
i++; i++;
}
} }
} finally {
tokens.close();
tokens = null;
} }
} }
} }

View File

@ -28,14 +28,15 @@ import java.io.InputStream;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
@ -48,7 +49,7 @@ import net.yacy.cora.util.Memory;
public class Digest { public class Digest {
public static BlockingQueue<MessageDigest> digestPool = new LinkedBlockingDeque<MessageDigest>(); public static Queue<MessageDigest> digestPool = new ConcurrentLinkedQueue<MessageDigest>();
private static final int md5CacheSize = Math.max(1000, Math.min(1000000, (int) (Memory.available() / 50000L))); private static final int md5CacheSize = Math.max(1000, Math.min(1000000, (int) (Memory.available() / 50000L)));
private static ARC<String, byte[]> md5Cache = null; private static ARC<String, byte[]> md5Cache = null;
@ -138,11 +139,8 @@ public class Digest {
digest.update(keyBytes); digest.update(keyBytes);
final byte[] result = digest.digest(); final byte[] result = digest.digest();
digest.reset(); // to be prepared for next digest.reset(); // to be prepared for next
try { digestPool.add(digest);
digestPool.put(digest); //System.out.println("Digest Pool size = " + digestPool.size());
//System.out.println("Digest Pool size = " + digestPool.size());
} catch (final InterruptedException e ) {
}
// update the cache // update the cache
md5Cache.insertIfAbsent(key, result); // prevent expensive MD5 computation and encoding md5Cache.insertIfAbsent(key, result); // prevent expensive MD5 computation and encoding

View File

@ -95,84 +95,85 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
buffer.append(document.dc_title().toLowerCase()); buffer.append(document.dc_title().toLowerCase());
for (String s:document.dc_description()) buffer.append(s.toLowerCase()); for (String s:document.dc_description()) buffer.append(s.toLowerCase());
buffer.append(document.dc_subject(' ').toLowerCase()); buffer.append(document.dc_subject(' ').toLowerCase());
final WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib); int score = 0;
try {
int score = 0;
// get phrases // get phrases
final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2); final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2);
phrases.putAll(getPhrases(document, 3)); phrases.putAll(getPhrases(document, 3));
final Iterator<String> iter = phrases.keySet().iterator(); final Iterator<String> iter = phrases.keySet().iterator();
while(iter.hasNext()) { while(iter.hasNext()) {
score = 10;
final String phrase = iter.next();
if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
}
if(isDigitSpace(phrase)) {
score = 10; score = 10;
final String phrase = iter.next();
if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
}
if(isDigitSpace(phrase)) {
score = 10;
}
if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
score = score * 10;
}
if (tags.containsKey(phrase)) {
score = score * 20;
}
topwords.add(new YMarkTag(phrase, score));
pwords.append(phrase);
pwords.append(' ');
} }
if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
// loop through potential tag and rank them score = score * 10;
while(tokens.hasMoreElements()) {
score = 0;
token = tokens.nextElement();
// check if the token appears in the text
if (words.containsKey(token.toString())) {
final Word word = words.get(token.toString());
// token appears in text and matches an existing bookmark tag
if (tags.containsKey(token.toString())) {
score = word.occurrences() * tags.get(token.toString()).size() * 200;
}
// token appears in text and has more than 3 characters
else if (token.length()>3) {
score = word.occurrences() * 100;
}
// if token is already part of a phrase, reduce score
if(pwords.toString().indexOf(token.toString())>1) {
score = score / 3;
}
topwords.add(new YMarkTag(token.toString(), score));
}
} }
score = 0; if (tags.containsKey(phrase)) {
buffer.setLength(0); score = score * 20;
for(final YMarkTag tag : topwords) {
if(score < max) {
if(tag.size() > 100) {
buffer.append(tag.name());
buffer.append(YMarkUtil.TAGS_SEPARATOR);
score++;
}
} else {
break;
}
} }
final String clean = YMarkUtil.cleanTagsString(buffer.toString()); topwords.add(new YMarkTag(phrase, score));
if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) { pwords.append(phrase);
return MultiProtocolURL.getFileExtension(document.dc_source().getFileName()); pwords.append(' ');
}
return clean;
} finally {
tokens.close();
} }
// loop through potential tag and rank them
WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib);
try {
while (tokens.hasMoreElements()) {
score = 0;
token = tokens.nextElement();
// check if the token appears in the text
if (words.containsKey(token.toString())) {
final Word word = words.get(token.toString());
// token appears in text and matches an existing bookmark tag
if (tags.containsKey(token.toString())) {
score = word.occurrences() * tags.get(token.toString()).size() * 200;
}
// token appears in text and has more than 3 characters
else if (token.length()>3) {
score = word.occurrences() * 100;
}
// if token is already part of a phrase, reduce score
if(pwords.toString().indexOf(token.toString())>1) {
score = score / 3;
}
topwords.add(new YMarkTag(token.toString(), score));
}
}
} finally {
tokens.close();
tokens = null;
}
score = 0;
buffer.setLength(0);
for(final YMarkTag tag : topwords) {
if(score < max) {
if(tag.size() > 100) {
buffer.append(tag.name());
buffer.append(YMarkUtil.TAGS_SEPARATOR);
score++;
}
} else {
break;
}
}
final String clean = YMarkUtil.cleanTagsString(buffer.toString());
if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
return MultiProtocolURL.getFileExtension(document.dc_source().getFileName());
}
return clean;
} }
private static TreeMap<String, YMarkTag> getPhrases(final Document document, final int size) { private static TreeMap<String, YMarkTag> getPhrases(final Document document, final int size) {
final TreeMap<String, YMarkTag> phrases = new TreeMap<String, YMarkTag>(); final TreeMap<String, YMarkTag> phrases = new TreeMap<String, YMarkTag>();
final StringBuilder phrase = new StringBuilder(128); final StringBuilder phrase = new StringBuilder(128);
final WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib); WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib);
try { try {
StringBuilder token; StringBuilder token;
int count = 0; int count = 0;
@ -206,6 +207,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
return phrases; return phrases;
} finally { } finally {
tokens.close(); tokens.close();
tokens = null;
} }
} }

View File

@ -285,6 +285,7 @@ public final class Condenser {
} }
} finally { } finally {
wordenum.close(); wordenum.close();
wordenum = null;
} }
} }
@ -345,7 +346,7 @@ public final class Condenser {
if (LibraryProvider.autotagging.isEmpty()) doAutotagging = false; if (LibraryProvider.autotagging.isEmpty()) doAutotagging = false;
// read source // read source
final WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib); WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
try { try {
while (wordenum.hasMoreElements()) { while (wordenum.hasMoreElements()) {
word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH); word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
@ -420,6 +421,7 @@ public final class Condenser {
} }
} finally { } finally {
wordenum.close(); wordenum.close();
wordenum = null;
} }
if (pseudostemming) { if (pseudostemming) {

View File

@ -53,25 +53,12 @@ public class SentenceReader implements Iterator<StringBuilder>, Iterable<StringB
} }
private StringBuilder nextElement0() { private StringBuilder nextElement0() {
final StringBuilder s = readSentence();
//System.out.println(" SENTENCE='" + s + "'"); // DEBUG
if (s == null) return null;
return s;
}
private StringBuilder readSentence() {
final StringBuilder s = new StringBuilder(80); final StringBuilder s = new StringBuilder(80);
int nextChar; int nextChar;
char c, lc = ' '; // starting with ' ' as last character prevents that the result string starts with a ' ' char c, lc = ' '; // starting with ' ' as last character prevents that the result string starts with a ' '
// find sentence end // find sentence end
while (true) { while (this.pos < this.text.length() && (nextChar = this.text.charAt(this.pos++)) > 0) {
if (this.pos >= this.text.length()) break;
nextChar = this.text.charAt(this.pos++);
//System.out.print((char) nextChar); // DEBUG
if (nextChar < 0) {
break;
}
c = (char) nextChar; c = (char) nextChar;
if (this.pre && (nextChar == 10 || nextChar == 13)) break; if (this.pre && (nextChar == 10 || nextChar == 13)) break;
if (c < ' ') c = ' '; if (c < ' ') c = ' ';

View File

@ -39,7 +39,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
// this enumeration removes all words that contain either wrong characters or are too short // this enumeration removes all words that contain either wrong characters or are too short
private StringBuilder buffer = null; private StringBuilder buffer = null;
private final unsievedWordsEnum e; private unsievedWordsEnum e;
private final WordCache meaningLib; private final WordCache meaningLib;
public WordTokenizer(final SentenceReader sr, final WordCache meaningLib) { public WordTokenizer(final SentenceReader sr, final WordCache meaningLib) {
@ -82,13 +82,15 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
public synchronized void close() { public synchronized void close() {
this.e.close(); this.e.close();
this.e = null;
this.buffer = null;
} }
private static class unsievedWordsEnum implements Enumeration<StringBuilder> { private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
// returns an enumeration of StringBuilder Objects // returns an enumeration of StringBuilder Objects
private StringBuilder buffer = null; private StringBuilder buffer = null;
private final SentenceReader sr; private SentenceReader sr;
private final List<StringBuilder> s; private List<StringBuilder> s;
private int sIndex; private int sIndex;
public unsievedWordsEnum(final SentenceReader sr0) { public unsievedWordsEnum(final SentenceReader sr0) {
@ -152,7 +154,11 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
} }
public synchronized void close() { public synchronized void close() {
this.sIndex = 0;
this.s.clear();
this.s = null;
this.sr.close(); this.sr.close();
this.sr = null;
} }
} }
@ -181,7 +187,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
*/ */
public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) { public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) {
final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder); final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
final WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib); WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
try { try {
int pos = 0; int pos = 0;
StringBuilder word; StringBuilder word;
@ -202,6 +208,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
return map; return map;
} finally { } finally {
words.close(); words.close();
words = null;
} }
} }
} }

View File

@ -902,10 +902,12 @@ public final class Protocol {
Map<String, String> resultMap = null; Map<String, String> resultMap = null;
String key = ""; String key = "";
final ContentBody keyBody = parts.get("key"); final ContentBody keyBody = parts.get("key");
if ( keyBody != null ) { if (keyBody != null) {
final ByteArrayOutputStream baos = new ByteArrayOutputStream(20); ByteArrayOutputStream baos = new ByteArrayOutputStream(20);
keyBody.writeTo(baos); keyBody.writeTo(baos);
key = baos.toString(); key = UTF8.String(baos.toByteArray());
baos.close();
baos = null;
} }
String filter = event.query.urlMask.pattern().toString(); String filter = event.query.urlMask.pattern().toString();
@ -1037,67 +1039,70 @@ public final class Protocol {
RemoteInstance instance = null; RemoteInstance instance = null;
SolrConnector solrConnector = null; SolrConnector solrConnector = null;
SolrDocumentList docList = null; SolrDocumentList docList = null;
QueryResponse rsp = null;
if (localsearch) {
// search the local index
try {
rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
docList = rsp.getResults();
} catch (final Throwable e) {
Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
return -1;
}
} else {
try {
String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 10000);
instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
solrConnector = new RemoteSolrConnector(instance, "solr");
rsp = solrConnector.getResponseByParams(solrQuery);
docList = rsp.getResults();
solrConnector.close();
instance.close();
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (final Throwable e) {
Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
return -1;
}
}
// evaluate facets
Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.size()); Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.size());
for (String field: event.query.facetfields) {
FacetField facet = rsp.getFacetField(field);
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
List<Count> values = facet == null ? null : facet.getValues();
if (values == null) continue;
for (Count ff: values) {
int c = (int) ff.getCount();
if (c == 0) continue;
result.set(ff.getName(), c);
}
if (result.size() > 0) facets.put(field, result);
}
// evaluate snippets
Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
Map<String, String> snippets = new HashMap<String, String>(); // this will be a list of urlhash-snippet entries Map<String, String> snippets = new HashMap<String, String>(); // this will be a list of urlhash-snippet entries
if (rawsnippets != null) { {// encapsulate expensive solr QueryResponse object
nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) { QueryResponse rsp = null;
Map<String, List<String>> rs = re.getValue(); if (localsearch) {
for (CollectionSchema field: snippetFields) { // search the local index
if (rs.containsKey(field.getSolrFieldName())) { try {
List<String> s = rs.get(field.getSolrFieldName()); rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
if (s.size() > 0) { docList = rsp.getResults();
snippets.put(re.getKey(), s.get(0)); } catch (final Throwable e) {
continue nextsnippet; Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
return -1;
}
} else {
try {
String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000);
instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
solrConnector = new RemoteSolrConnector(instance, "solr");
rsp = solrConnector.getResponseByParams(solrQuery);
docList = rsp.getResults();
solrConnector.close();
instance.close();
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (final Throwable e) {
Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
return -1;
}
}
// evaluate facets
for (String field: event.query.facetfields) {
FacetField facet = rsp.getFacetField(field);
ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
List<Count> values = facet == null ? null : facet.getValues();
if (values == null) continue;
for (Count ff: values) {
int c = (int) ff.getCount();
if (c == 0) continue;
result.set(ff.getName(), c);
}
if (result.size() > 0) facets.put(field, result);
}
// evaluate snippets
Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
if (rawsnippets != null) {
nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) {
Map<String, List<String>> rs = re.getValue();
for (CollectionSchema field: snippetFields) {
if (rs.containsKey(field.getSolrFieldName())) {
List<String> s = rs.get(field.getSolrFieldName());
if (s.size() > 0) {
snippets.put(re.getKey(), s.get(0));
continue nextsnippet;
}
} }
} }
// no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
} }
// no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
} }
rsp = null;
} }
// evaluate result // evaluate result
List<URIMetadataNode> container = new ArrayList<URIMetadataNode>(); List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
if (docList == null || docList.size() == 0) { if (docList == null || docList.size() == 0) {
@ -1164,24 +1169,25 @@ public final class Protocol {
// add the url entry to the word indexes // add the url entry to the word indexes
container.add(urlEntry); container.add(urlEntry);
} }
final int dls = docList.size();
final int numFound = (int) docList.getNumFound();
docList.clear();
docList = null;
if (localsearch) { if (localsearch) {
event.addNodes(container, facets, snippets, true, "localpeer", (int) docList.getNumFound()); event.addNodes(container, facets, snippets, true, "localpeer", numFound);
event.addFinalize(); event.addFinalize();
event.addExpectedRemoteReferences(-count); event.addExpectedRemoteReferences(-count);
Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + docList.getNumFound() + " references"); Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
} else { } else {
for (SolrInputDocument doc: docs) { for (SolrInputDocument doc: docs) {
event.query.getSegment().putDocumentInQueue(doc); event.query.getSegment().putDocumentInQueue(doc);
} }
docs.clear(); docs = null; docs.clear(); docs = null;
event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, (int) docList.getNumFound()); event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
event.addFinalize(); event.addFinalize();
event.addExpectedRemoteReferences(-count); event.addExpectedRemoteReferences(-count);
Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + docList.getNumFound() + " references"); Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references");
} }
final int dls = docList.size();
docList.clear();
docList = null;
if (solrConnector != null) solrConnector.close(); if (solrConnector != null) solrConnector.close();
if (instance != null) instance.close(); if (instance != null) instance.close();
return dls; return dls;

View File

@ -191,14 +191,17 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
// we did not find everything in the metadata, look further into the document itself. // we did not find everything in the metadata, look further into the document itself.
// first acquire the sentences: // first acquire the sentences:
final String solrText = row.getText(); String solrText = row.getText();
if (solrText != null) { if (solrText != null) {
// compute sentences from solr query // compute sentences from solr query
final SentenceReader sr = new SentenceReader(solrText, pre); SentenceReader sr = new SentenceReader(solrText, pre);
sentences = new ArrayList<StringBuilder>(); sentences = new ArrayList<StringBuilder>();
while (sr.hasNext()) { while (sr.hasNext()) {
sentences.add(sr.next()); sentences.add(sr.next());
} }
sr.close();
sr = null;
solrText = null;
} else if (net.yacy.crawler.data.Cache.has(url.hash())) { } else if (net.yacy.crawler.data.Cache.has(url.hash())) {
// get the sentences from the cache // get the sentences from the cache
final Request request = loader == null ? null : loader.request(url, true, reindexing); final Request request = loader == null ? null : loader.request(url, true, reindexing);
@ -213,6 +216,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
try { try {
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse()); document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
sentences = document.getSentences(pre); sentences = document.getSentences(pre);
response = null;
document = null;
} catch (final Parser.Failure e) { } catch (final Parser.Failure e) {
} }
} }
@ -254,6 +259,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null); init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null);
return; return;
} }
sentences = null; // we don't need this here any more
// try to load the resource from the cache // try to load the resource from the cache
Response response = null; Response response = null;
@ -311,6 +317,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage()); init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage());
return; return;
} }
sentences = null;
} //encapsulate potential expensive sentences END } //encapsulate potential expensive sentences END
// compute snippet from media - attention document closed above! // compute snippet from media - attention document closed above!