hacks to prevent storage of data longer than necessary during search and

some speed enhancements. This should reduce the memory usage during heavy-load search a bit.
2024-09-19 00:01:41 +02:00 · 2013-10-25 15:05:30 +02:00 · 2013-10-25 15:05:30 +02:00 · 9bb7eab389
commit 9bb7eab389
parent 3c3cb78555
9 changed files with 183 additions and 171 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -1095,7 +1095,7 @@ federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
 federated.service.solr.indexing.sharding = MODULO_HOST_MD5
 # the lazy attribute causes that fields containing "" or 0 are not added and not written
 federated.service.solr.indexing.lazy = true
-federated.service.solr.indexing.timeout = 10000
+federated.service.solr.indexing.timeout = 6000

 # temporary definition of backend services to use.
 # After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db.
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -31,7 +31,6 @@ import java.io.IOException;
 import java.net.MalformedURLException;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.Map;
 import net.yacy.cora.document.encoding.ASCII;
@ -290,17 +289,21 @@ public class ViewFile {
                    // Search word highlighting
                    for (final StringBuilder s: sentences) {
                        sentence = s.toString();
-                        Enumeration<StringBuilder> tokens = null;
-                        tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib);
-                        while (tokens.hasMoreElements()) {
-                            token = tokens.nextElement();
-                            if (token.length() > 0) {
-                                prop.put("viewMode_words_" + i + "_nr", i + 1);
-                                prop.put("viewMode_words_" + i + "_word", token.toString());
-                                prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
-                                dark = !dark;
-                                i++;
+                        WordTokenizer tokens = new WordTokenizer(new SentenceReader(sentence), LibraryProvider.dymLib);
+                        try {
+                            while (tokens.hasMoreElements()) {
+                                token = tokens.nextElement();
+                                if (token.length() > 0) {
+                                    prop.put("viewMode_words_" + i + "_nr", i + 1);
+                                    prop.put("viewMode_words_" + i + "_word", token.toString());
+                                    prop.put("viewMode_words_" + i + "_dark", dark ? "1" : "0");
+                                    dark = !dark;
+                                    i++;
+                                }
                            }
+                        } finally {
+                            tokens.close();
+                            tokens = null;
                        }
                    }
                }
--- a/source/net/yacy/cora/order/Digest.java
+++ b/source/net/yacy/cora/order/Digest.java
@ -28,14 +28,15 @@ import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.security.MessageDigest;
 import java.security.NoSuchAlgorithmException;
+import java.util.Queue;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingDeque;
 import java.util.concurrent.LinkedBlockingQueue;

 import net.yacy.cora.document.encoding.UTF8;
@ -48,7 +49,7 @@ import net.yacy.cora.util.Memory;

 public class Digest {

-	public static BlockingQueue<MessageDigest> digestPool = new LinkedBlockingDeque<MessageDigest>();
+	public static Queue<MessageDigest> digestPool = new ConcurrentLinkedQueue<MessageDigest>();

    private static final int md5CacheSize = Math.max(1000, Math.min(1000000, (int) (Memory.available() / 50000L)));
    private static ARC<String, byte[]> md5Cache = null;
@ -138,11 +139,8 @@ public class Digest {
        digest.update(keyBytes);
        final byte[] result = digest.digest();
        digest.reset(); // to be prepared for next
-        try {
-            digestPool.put(digest);
-            //System.out.println("Digest Pool size = " + digestPool.size());
-        } catch (final  InterruptedException e ) {
-        }
+        digestPool.add(digest);
+        //System.out.println("Digest Pool size = " + digestPool.size());

        // update the cache
        md5Cache.insertIfAbsent(key, result); // prevent expensive MD5 computation and encoding
--- a/source/net/yacy/data/ymark/YMarkAutoTagger.java
+++ b/source/net/yacy/data/ymark/YMarkAutoTagger.java
@ -95,84 +95,85 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 		buffer.append(document.dc_title().toLowerCase());
 		for (String s:document.dc_description()) buffer.append(s.toLowerCase());
 		buffer.append(document.dc_subject(' ').toLowerCase());
-		final WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib);
-		try {
-			int score = 0;
+		int score = 0;

-			// get phrases
-			final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2);
-			phrases.putAll(getPhrases(document, 3));
-			final Iterator<String> iter = phrases.keySet().iterator();
-			while(iter.hasNext()) {
+		// get phrases
+		final TreeMap<String, YMarkTag> phrases = getPhrases(document, 2);
+		phrases.putAll(getPhrases(document, 3));
+		final Iterator<String> iter = phrases.keySet().iterator();
+		while(iter.hasNext()) {
+			score = 10;
+			final String phrase = iter.next();
+			if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
+				score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
+			}
+			if(isDigitSpace(phrase)) {
 				score = 10;
-				final String phrase = iter.next();
-				if(phrases.get(phrase).size() > 3 && phrases.get(phrase).size() < 10) {
-					score = phrases.get(phrase).size() * phrase.split(" ").length * 20;
-				}
-				if(isDigitSpace(phrase)) {
-					score = 10;
-				}
-				if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
-					score = score * 10;
-				}
-				if (tags.containsKey(phrase)) {
-					score = score * 20;
-				}
-				topwords.add(new YMarkTag(phrase, score));
-				pwords.append(phrase);
-				pwords.append(' ');
 			}
-
-			// loop through potential tag and rank them
-			while(tokens.hasMoreElements()) {
-				score = 0;
-				token = tokens.nextElement();
-
-				// check if the token appears in the text
-				if (words.containsKey(token.toString())) {
-					final Word word = words.get(token.toString());
-					// token appears in text and matches an existing bookmark tag
-					if (tags.containsKey(token.toString())) {
-						score = word.occurrences() * tags.get(token.toString()).size() * 200;
-					}
-					// token appears in text and has more than 3 characters
-					else if (token.length()>3) {
-						score = word.occurrences() * 100;
-					}
-					// if token is already part of a phrase, reduce score
-					if(pwords.toString().indexOf(token.toString())>1) {
-						score = score / 3;
-					}
-					topwords.add(new YMarkTag(token.toString(), score));
-				}
+			if(phrases.get(phrase).size() > 2 && buffer.indexOf(phrase) > 1) {
+				score = score * 10;
 			}
-			score = 0;
-			buffer.setLength(0);
-			for(final YMarkTag tag : topwords) {
-				if(score < max) {
-					if(tag.size() > 100) {
-						buffer.append(tag.name());
-						buffer.append(YMarkUtil.TAGS_SEPARATOR);
-						score++;
-					}
-				} else {
-					break;
-				}
+			if (tags.containsKey(phrase)) {
+				score = score * 20;
 			}
-			final String clean =  YMarkUtil.cleanTagsString(buffer.toString());
-			if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
-				return MultiProtocolURL.getFileExtension(document.dc_source().getFileName());
-			}
-			return clean;
-		} finally {
-			tokens.close();
+			topwords.add(new YMarkTag(phrase, score));
+			pwords.append(phrase);
+			pwords.append(' ');
 		}
+
+		// loop through potential tag and rank them
+        WordTokenizer tokens = new WordTokenizer(new SentenceReader(buffer.toString()), LibraryProvider.dymLib);
+        try {
+    		while (tokens.hasMoreElements()) {
+    			score = 0;
+    			token = tokens.nextElement();
+    
+    			// check if the token appears in the text
+    			if (words.containsKey(token.toString())) {
+    				final Word word = words.get(token.toString());
+    				// token appears in text and matches an existing bookmark tag
+    				if (tags.containsKey(token.toString())) {
+    					score = word.occurrences() * tags.get(token.toString()).size() * 200;
+    				}
+    				// token appears in text and has more than 3 characters
+    				else if (token.length()>3) {
+    					score = word.occurrences() * 100;
+    				}
+    				// if token is already part of a phrase, reduce score
+    				if(pwords.toString().indexOf(token.toString())>1) {
+    					score = score / 3;
+    				}
+    				topwords.add(new YMarkTag(token.toString(), score));
+    			}
+    		}
+        } finally {
+            tokens.close();
+            tokens = null;
+        }
+		score = 0;
+		buffer.setLength(0);
+		for(final YMarkTag tag : topwords) {
+			if(score < max) {
+				if(tag.size() > 100) {
+					buffer.append(tag.name());
+					buffer.append(YMarkUtil.TAGS_SEPARATOR);
+					score++;
+				}
+			} else {
+				break;
+			}
+		}
+		final String clean =  YMarkUtil.cleanTagsString(buffer.toString());
+		if(clean.equals(YMarkEntry.BOOKMARK.TAGS.deflt())) {
+			return MultiProtocolURL.getFileExtension(document.dc_source().getFileName());
+		}
+		return clean;
 	}

 	private static TreeMap<String, YMarkTag> getPhrases(final Document document, final int size) {
 		final TreeMap<String, YMarkTag> phrases = new TreeMap<String, YMarkTag>();
 		final StringBuilder phrase = new StringBuilder(128);
-		final WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib);
+		WordTokenizer tokens = new WordTokenizer(new SentenceReader(document.getTextString()), LibraryProvider.dymLib);
 		try {
 			StringBuilder token;
 			int count = 0;
@ -206,6 +207,7 @@ public class YMarkAutoTagger implements Runnable, Thread.UncaughtExceptionHandle
 			return phrases;
 		} finally {
 			tokens.close();
+			tokens = null;
 		}
 	}

--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@ -285,6 +285,7 @@ public final class Condenser {
 	        }
        } finally {
        	wordenum.close();
+        	wordenum = null;
        }
    }

@ -345,7 +346,7 @@ public final class Condenser {
        if (LibraryProvider.autotagging.isEmpty()) doAutotagging = false;

        // read source
-        final WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
+        WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
        try {
 	        while (wordenum.hasMoreElements()) {
 	            word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
@ -420,6 +421,7 @@ public final class Condenser {
 	        }
        } finally {
        	wordenum.close();
+        	wordenum = null;
        }

        if (pseudostemming) {
--- a/source/net/yacy/document/SentenceReader.java
+++ b/source/net/yacy/document/SentenceReader.java
@ -53,25 +53,12 @@ public class SentenceReader implements Iterator<StringBuilder>, Iterable<StringB
    }

    private StringBuilder nextElement0() {
-        final StringBuilder s = readSentence();
-		//System.out.println(" SENTENCE='" + s + "'"); // DEBUG
-		if (s == null) return null;
-		return s;
-    }
-
-    private StringBuilder readSentence() {
        final StringBuilder s = new StringBuilder(80);
        int nextChar;
        char c, lc = ' '; // starting with ' ' as last character prevents that the result string starts with a ' '

        // find sentence end
-        while (true) {
-        	if (this.pos >= this.text.length()) break;
-            nextChar = this.text.charAt(this.pos++);
-            //System.out.print((char) nextChar); // DEBUG
-            if (nextChar < 0) {
-                break;
-            }
+        while (this.pos < this.text.length() && (nextChar = this.text.charAt(this.pos++)) > 0) {
            c = (char) nextChar;
            if (this.pre && (nextChar == 10 || nextChar == 13)) break;
            if (c < ' ') c = ' ';
--- a/source/net/yacy/document/WordTokenizer.java
+++ b/source/net/yacy/document/WordTokenizer.java
@ -39,7 +39,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
 // this enumeration removes all words that contain either wrong characters or are too short

    private StringBuilder buffer = null;
-    private final unsievedWordsEnum e;
+    private unsievedWordsEnum e;
    private final WordCache meaningLib;

    public WordTokenizer(final SentenceReader sr, final WordCache meaningLib) {
@ -82,13 +82,15 @@ public class WordTokenizer implements Enumeration<StringBuilder> {

    public synchronized void close() {
    	this.e.close();
+    	this.e = null;
+    	this.buffer = null;
    }

    private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
        // returns an enumeration of StringBuilder Objects
        private StringBuilder buffer = null;
-        private final SentenceReader sr;
-        private final List<StringBuilder> s;
+        private SentenceReader sr;
+        private List<StringBuilder> s;
        private int sIndex;

        public unsievedWordsEnum(final SentenceReader sr0) {
@ -152,7 +154,11 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
        }

        public synchronized void close() {
+            this.sIndex = 0;
+            this.s.clear();
+            this.s = null;
        	this.sr.close();
+        	this.sr = null;
        }
    }

@ -181,7 +187,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
     */
    public static SortedMap<byte[], Integer> hashSentence(final String sentence, final WordCache meaningLib, int maxlength) {
        final SortedMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
-        final WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
+        WordTokenizer words = new WordTokenizer(new SentenceReader(sentence), meaningLib);
        try {
 	        int pos = 0;
 	        StringBuilder word;
@ -202,6 +208,7 @@ public class WordTokenizer implements Enumeration<StringBuilder> {
 	        return map;
        } finally {
        	words.close();
+        	words = null;
        }
    }
 }
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@ -902,10 +902,12 @@ public final class Protocol {
            Map<String, String> resultMap = null;
            String key = "";
            final ContentBody keyBody = parts.get("key");
-            if ( keyBody != null ) {
-                final ByteArrayOutputStream baos = new ByteArrayOutputStream(20);
+            if (keyBody != null) {
+                ByteArrayOutputStream baos = new ByteArrayOutputStream(20);
                keyBody.writeTo(baos);
-                key = baos.toString();
+                key = UTF8.String(baos.toByteArray());
+                baos.close();
+                baos = null;
            }
            
            String filter = event.query.urlMask.pattern().toString();
@ -1037,67 +1039,70 @@ public final class Protocol {
        RemoteInstance instance = null;
        SolrConnector solrConnector = null;        
        SolrDocumentList docList = null;
-        QueryResponse rsp = null;
-        if (localsearch) {
-            // search the local index
-            try {
-                rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
-                docList = rsp.getResults();
-            } catch (final Throwable e) {
-                Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
-                return -1;
-            }
-        } else {
-            try {
-                String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
-                final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 10000);
-                instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
-                solrConnector = new RemoteSolrConnector(instance, "solr");
-                rsp = solrConnector.getResponseByParams(solrQuery);
-                docList = rsp.getResults();
-                solrConnector.close();
-                instance.close();
-                // no need to close this here because that sends a commit to remote solr which is not wanted here
-            } catch (final Throwable e) {
-                Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
-                return -1;
-            }
-        }
-        
-        // evaluate facets
        Map<String, ReversibleScoreMap<String>> facets = new HashMap<String, ReversibleScoreMap<String>>(event.query.facetfields.size());
-        for (String field: event.query.facetfields) {
-            FacetField facet = rsp.getFacetField(field);
-            ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
-            List<Count> values = facet == null ? null : facet.getValues();
-            if (values == null) continue;
-            for (Count ff: values) {
-                int c = (int) ff.getCount();
-                if (c == 0) continue;
-                result.set(ff.getName(), c);
-            }
-            if (result.size() > 0) facets.put(field, result);
-        }
-        
-        // evaluate snippets
-        Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
        Map<String, String> snippets = new HashMap<String, String>(); // this will be a list of urlhash-snippet entries
-        if (rawsnippets != null) {
-            nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) {
-                Map<String, List<String>> rs = re.getValue();
-                for (CollectionSchema field: snippetFields) {
-                    if (rs.containsKey(field.getSolrFieldName())) {
-                        List<String> s = rs.get(field.getSolrFieldName());
-                        if (s.size() > 0) {
-                            snippets.put(re.getKey(), s.get(0));
-                            continue nextsnippet;
+        {// encapsulate expensive solr QueryResponse object
+            QueryResponse rsp = null;
+            if (localsearch) {
+                // search the local index
+                try {
+                    rsp = event.getQuery().getSegment().fulltext().getDefaultConnector().getResponseByParams(solrQuery);
+                    docList = rsp.getResults();
+                } catch (final Throwable e) {
+                    Network.log.info("SEARCH failed (solr), localpeer (" + e.getMessage() + ")", e);
+                    return -1;
+                }
+            } else {
+                try {
+                    String address = target == event.peers.mySeed() ? "localhost:" + target.getPort() : target.getPublicAddress();
+                    final int solrtimeout = Switchboard.getSwitchboard().getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_TIMEOUT, 6000);
+                    instance = new RemoteInstance("http://" + address, null, "solr", solrtimeout); // this is a 'patch configuration' which considers 'solr' as default collection
+                    solrConnector = new RemoteSolrConnector(instance, "solr");
+                    rsp = solrConnector.getResponseByParams(solrQuery);
+                    docList = rsp.getResults();
+                    solrConnector.close();
+                    instance.close();
+                    // no need to close this here because that sends a commit to remote solr which is not wanted here
+                } catch (final Throwable e) {
+                    Network.log.info("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")");
+                    return -1;
+                }
+            }
+            
+            // evaluate facets
+            for (String field: event.query.facetfields) {
+                FacetField facet = rsp.getFacetField(field);
+                ReversibleScoreMap<String> result = new ClusteredScoreMap<String>(UTF8.insensitiveUTF8Comparator);
+                List<Count> values = facet == null ? null : facet.getValues();
+                if (values == null) continue;
+                for (Count ff: values) {
+                    int c = (int) ff.getCount();
+                    if (c == 0) continue;
+                    result.set(ff.getName(), c);
+                }
+                if (result.size() > 0) facets.put(field, result);
+            }
+            
+            // evaluate snippets
+            Map<String, Map<String, List<String>>> rawsnippets = rsp.getHighlighting(); // a map from the urlhash to a map with key=field and value = list of snippets
+            if (rawsnippets != null) {
+                nextsnippet: for (Map.Entry<String, Map<String, List<String>>> re: rawsnippets.entrySet()) {
+                    Map<String, List<String>> rs = re.getValue();
+                    for (CollectionSchema field: snippetFields) {
+                        if (rs.containsKey(field.getSolrFieldName())) {
+                            List<String> s = rs.get(field.getSolrFieldName());
+                            if (s.size() > 0) {
+                                snippets.put(re.getKey(), s.get(0));
+                                continue nextsnippet;
+                            }
                        }
                    }
+                    // no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
                }
-                // no snippet found :( --we don't assign a value here by default; that can be done as an evaluation outside this method
            }
+            rsp = null;
        }
-
+        
        // evaluate result
        List<URIMetadataNode> container = new ArrayList<URIMetadataNode>();
 		if (docList == null || docList.size() == 0) {
@ -1164,24 +1169,25 @@ public final class Protocol {
            // add the url entry to the word indexes
            container.add(urlEntry);
        }
+        final int dls = docList.size();
+        final int numFound = (int) docList.getNumFound();
+        docList.clear();
+        docList = null;
        if (localsearch) {
-            event.addNodes(container, facets, snippets, true, "localpeer", (int) docList.getNumFound());
+            event.addNodes(container, facets, snippets, true, "localpeer", numFound);
            event.addFinalize();
            event.addExpectedRemoteReferences(-count);
-            Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + docList.getNumFound() + " references");
+            Network.log.info("local search (solr): localpeer sent " + container.size() + "/" + numFound + " references");
        } else {
            for (SolrInputDocument doc: docs) {
                event.query.getSegment().putDocumentInQueue(doc);
            }
            docs.clear(); docs = null;
-            event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, (int) docList.getNumFound());
+            event.addNodes(container, facets, snippets, false, target.getName() + "/" + target.hash, numFound);
            event.addFinalize();
            event.addExpectedRemoteReferences(-count);
-            Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + docList.getNumFound() + " references");
+            Network.log.info("remote search (solr): peer " + target.getName() + " sent " + (container.size() == 0 ? 0 : container.size()) + "/" + numFound + " references");
        }
-        final int dls = docList.size();
-        docList.clear();
-        docList = null;
        if (solrConnector != null) solrConnector.close();
        if (instance != null) instance.close();
        return dls;
--- a/source/net/yacy/search/snippet/TextSnippet.java
+++ b/source/net/yacy/search/snippet/TextSnippet.java
@ -191,14 +191,17 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                // we did not find everything in the metadata, look further into the document itself.

                // first acquire the sentences:
-                final String solrText = row.getText();
+                String solrText = row.getText();
                if (solrText != null) {
                    // compute sentences from solr query
-                    final SentenceReader sr = new SentenceReader(solrText, pre);
+                    SentenceReader sr = new SentenceReader(solrText, pre);
                    sentences = new ArrayList<StringBuilder>();
                    while (sr.hasNext()) {
                        sentences.add(sr.next());
                    }
+                    sr.close();
+                    sr = null;
+                    solrText = null;
                } else if (net.yacy.crawler.data.Cache.has(url.hash())) {
                    // get the sentences from the cache
                    final Request request = loader == null ? null : loader.request(url, true, reindexing);
@ -213,6 +216,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                        try {
                            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                            sentences = document.getSentences(pre);
+                            response = null;
+                            document = null;
                        } catch (final Parser.Failure e) {
                        }
                    }
@ -254,6 +259,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null);
                return;
            }
+            sentences = null; // we don't need this here any more

            // try to load the resource from the cache
            Response response = null;
@ -311,6 +317,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage());
                return;
            }
+            sentences = null;
        } //encapsulate potential expensive sentences END

        // compute snippet from media - attention document closed above!