// SearchEvent.java // (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 10.10.2005 on http://yacy.net // // This is a part of YaCy, a peer-to-peer based web search engine // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.search.query; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.services.federated.solr.SolrConnector; import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.cora.sorting.ScoreMap; import net.yacy.cora.sorting.WeakPriorityBlockingQueue; import net.yacy.cora.sorting.WeakPriorityBlockingQueue.Element; import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement; import net.yacy.document.Condenser; import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.MemoryControl; import net.yacy.peers.SeedDB; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; import net.yacy.search.index.SolrField; import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import de.anomic.crawler.Cache; import de.anomic.data.WorkTables; public class SnippetProcess { private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2); // input values final RWIProcess rankingProcess; // ordered search results, grows dynamically as all the query threads enrich this container QueryParams query; private final SeedDB peers; private final WorkTables workTables; // result values protected final LoaderDispatcher loader; protected Worker[] workerThreads; protected final WeakPriorityBlockingQueue result; protected final WeakPriorityBlockingQueue images; // container to sort images by size protected final HandleSet snippetFetchWordHashes; // a set of word hashes that are used to match with the snippets long urlRetrievalAllTime; long snippetComputationAllTime; int taketimeout; private final boolean deleteIfSnippetFail, remote; private boolean cleanupState; public SnippetProcess( final LoaderDispatcher loader, final RWIProcess rankedCache, final QueryParams query, final SeedDB peers, final WorkTables workTables, final int taketimeout, final boolean deleteIfSnippetFail, final boolean remote) { assert query != null; this.loader = loader; this.rankingProcess = rankedCache; this.query = query; this.peers = peers; this.workTables = workTables; this.taketimeout = taketimeout; this.deleteIfSnippetFail = deleteIfSnippetFail; this.remote = remote; this.cleanupState = false; this.urlRetrievalAllTime = 0; this.snippetComputationAllTime = 0; this.result = new WeakPriorityBlockingQueue(Math.max(1000, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking this.images = new WeakPriorityBlockingQueue(Math.max(1000, 10 * query.itemsPerPage()), true); // snippets do not need to match with the complete query hashes, // only with the query minus the stopwords which had not been used for the search HandleSet filtered; try { filtered = HandleSet.joinConstructive(query.queryHashes, Switchboard.stopwordHashes); } catch (final RowSpaceExceededException e) { Log.logException(e); filtered = new HandleSet(query.queryHashes.row().primaryKeyLength, query.queryHashes.comparator(), 0); } this.snippetFetchWordHashes = query.queryHashes.clone(); if (filtered != null && !filtered.isEmpty()) { this.snippetFetchWordHashes.excludeDestructive(Switchboard.stopwordHashes); } // start worker threads to fetch urls and snippets this.workerThreads = null; deployWorker(Math.min(SNIPPET_WORKER_THREADS, query.itemsPerPage), query.neededResults()); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(query.id(true), SearchEvent.Type.SNIPPETFETCH_START, ((this.workerThreads == null) ? "no" : this.workerThreads.length) + " online snippet fetch threads started", 0, 0), false); } public void setCleanupState() { this.cleanupState = true; } public long getURLRetrievalTime() { return this.urlRetrievalAllTime; } public long getSnippetComputationTime() { return this.snippetComputationAllTime; } public ResultEntry oneResult(final int item, final long timeout) { // check if we already retrieved this item // (happens if a search pages is accessed a second time) final long finishTime = System.currentTimeMillis() + timeout; EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "started, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); //Log.logInfo("SnippetProcess", "*start method for item = " + item + "; anyWorkerAlive=" + anyWorkerAlive() + "; this.rankingProcess.isAlive() = " + this.rankingProcess.isAlive() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished() + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue()); // we must wait some time until the first result page is full to get enough elements for ranking final long waittimeout = System.currentTimeMillis() + 300; if (this.remote && item < 10 && !this.rankingProcess.feedingIsFinished()) { // the first 10 results have a very special timing to get most of the remote results ordered // before they are presented on the first lines .. yes sleeps seem to be bad. but how shall we predict how long other // peers will take until they respond? long sleep = item == 0 ? 600 : (10 - item) * 12; // the first result takes the longest time //Log.logInfo("SnippetProcess", "SLEEP = " + sleep); try { Thread.sleep(sleep); } catch (final InterruptedException e1) { Log.logException(e1); } } int thisRankingQueueSize, lastRankingQueueSize = 0; if (item < 10) { while ( ((thisRankingQueueSize = this.rankingProcess.sizeQueue()) > 0 || !this.rankingProcess.feedingIsFinished()) && (thisRankingQueueSize > lastRankingQueueSize || this.result.sizeAvailable() < item + 1) && System.currentTimeMillis() < waittimeout && anyWorkerAlive() ) { // wait a little time to get first results in the search lastRankingQueueSize = thisRankingQueueSize; try { Thread.sleep(20); } catch (final InterruptedException e1) {} } } if (this.result.sizeAvailable() > item) { // we have the wanted result already in the result array .. return that final ResultEntry re = this.result.element(item).getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "prefetched, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false); return re; } // finally wait until enough results are there produced from the snippet fetch process WeakPriorityBlockingQueue.Element entry = null; while (System.currentTimeMillis() < finishTime) { //Log.logInfo("SnippetProcess", "item = " + item + "; anyWorkerAlive=" + anyWorkerAlive() + "; this.rankingProcess.isAlive() = " + this.rankingProcess.isAlive() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished() + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue()); if (!anyWorkerAlive() && !this.rankingProcess.isAlive() && this.result.sizeAvailable() + this.rankingProcess.sizeQueue() <= item && this.rankingProcess.feedingIsFinished()) { //Log.logInfo("SnippetProcess", "interrupted result fetching; item = " + item + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished()); break; // the fail case } // deploy worker to get more results if (!anyWorkerAlive()) { final int neededInclPrefetch = this.query.neededResults() + ((MemoryControl.available() > 100 * 1024 * 1024 && SNIPPET_WORKER_THREADS >= 8) ? this.query.itemsPerPage : 0); deployWorker(Math.min(SNIPPET_WORKER_THREADS, this.query.itemsPerPage), neededInclPrefetch); } try {entry = this.result.element(item, 50);} catch (final InterruptedException e) {break;} if (entry != null) { break; } } // finally, if there is something, return the result if (entry == null) { EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "not found, item = " + item + ", available = " + this.result.sizeAvailable(), 0, 0), false); //Log.logInfo("SnippetProcess", "NO ENTRY computed (possible timeout); anyWorkerAlive=" + anyWorkerAlive() + "; this.rankingProcess.isAlive() = " + this.rankingProcess.isAlive() + "; this.rankingProcess.feedingIsFinished() = " + this.rankingProcess.feedingIsFinished() + "; this.result.sizeAvailable() = " + this.result.sizeAvailable() + ", this.rankingProcess.sizeQueue() = " + this.rankingProcess.sizeQueue()); return null; } final ResultEntry re = entry.getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEvent.Type.ONERESULT, "retrieved, item = " + item + ", available = " + this.result.sizeAvailable() + ": " + re.urlstring(), 0, 0), false); if (item == this.query.offset + this.query.itemsPerPage - 1) { stopAllWorker(); // we don't need more } return re; } private int resultCounter = 0; public ResultEntry nextResult() { final ResultEntry re = oneResult(this.resultCounter, Math.max(3000, this.query.timeout - System.currentTimeMillis())); this.resultCounter++; return re; } public MediaSnippet oneImage(final int item) { // always look for a next object if there are way too less if (this.images.sizeAvailable() <= item + 10) { fillImagesCache(); } // check if we already retrieved the item if (this.images.sizeDrained() > item) { return this.images.element(item).getElement(); } // look again if there are not enough for presentation while (this.images.sizeAvailable() <= item) { if (fillImagesCache() == 0) { break; } } if (this.images.sizeAvailable() <= item) { return null; } // now take the specific item from the image stack return this.images.element(item).getElement(); } private int fillImagesCache() { final ResultEntry result = nextResult(); int c = 0; if (result == null) { return c; } // iterate over all images in the result final List imagemedia = result.mediaSnippets(); if (imagemedia != null) { ResponseHeader header; feedloop: for (final MediaSnippet ms: imagemedia) { // check cache to see if the mime type of the image url is correct header = Cache.getResponseHeader(ms.href.hash()); if (header != null) { // this does not work for all urls since some of them may not be in the cache if (header.mime().startsWith("text") || header.mime().startsWith("application")) { continue feedloop; } } this.images.put(new ReverseElement(ms, ms.ranking)); // remove smallest in case of overflow c++; //System.out.println("*** image " + UTF8.String(ms.href.hash()) + " images.size = " + images.size() + "/" + images.size()); } } return c; } public ArrayList> completeResults(final long waitingtime) { final long timeout = System.currentTimeMillis() + waitingtime; while ( this.result.sizeAvailable() < this.query.neededResults() && anyWorkerAlive() && System.currentTimeMillis() < timeout) { try {Thread.sleep(10);} catch (final InterruptedException e) {} //System.out.println("+++DEBUG-completeResults+++ sleeping " + 200); } return this.result.list(Math.min(this.query.neededResults(), this.result.sizeAvailable())); } public long postRanking( final ResultEntry rentry, final ScoreMap topwords) { long r = 0; // for media search: prefer pages with many links r += rentry.limage() << this.query.ranking.coeff_cathasimage; r += rentry.laudio() << this.query.ranking.coeff_cathasaudio; r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo; r += rentry.lapp() << this.query.ranking.coeff_cathasapp; // apply citation count //System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother()); r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; // prefer hit with 'prefer' pattern if (this.query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) { r += 256 << this.query.ranking.coeff_prefer; } if (this.query.prefer.matcher(rentry.title()).matches()) { r += 256 << this.query.ranking.coeff_prefer; } // apply 'common-sense' heuristic using references final String urlstring = rentry.url().toNormalform(true, true); final String[] urlcomps = MultiProtocolURI.urlComps(urlstring); final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase()); int tc; for (final String urlcomp : urlcomps) { tc = topwords.get(urlcomp); if (tc > 0) { r += Math.max(1, tc) << this.query.ranking.coeff_urlcompintoplist; } } for (final String descrcomp : descrcomps) { tc = topwords.get(descrcomp); if (tc > 0) { r += Math.max(1, tc) << this.query.ranking.coeff_descrcompintoplist; } } // apply query-in-result matching final HandleSet urlcomph = Word.words2hashesHandles(urlcomps); final HandleSet descrcomph = Word.words2hashesHandles(descrcomps); final Iterator shi = this.query.queryHashes.iterator(); byte[] queryhash; while (shi.hasNext()) { queryhash = shi.next(); if (urlcomph.has(queryhash)) { r += 256 << this.query.ranking.coeff_appurl; } if (descrcomph.has(queryhash)) { r += 256 << this.query.ranking.coeff_app_dc_title; } } return r; } public void deployWorker(int deployCount, final int neededResults) { if (this.cleanupState || (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0) || this.result.sizeAvailable() >= neededResults) { return; } Worker worker; if (this.workerThreads == null) { this.workerThreads = new Worker[deployCount]; synchronized(this.workerThreads) {try { for (int i = 0; i < this.workerThreads.length; i++) { if (this.result.sizeAvailable() >= neededResults || (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) { break; } worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); worker.start(); this.workerThreads[i] = worker; if (this.rankingProcess.expectMoreRemoteReferences()) { long wait = this.rankingProcess.waitTimeRecommendation(); if (wait > 0) { try {Thread.sleep(wait);} catch ( InterruptedException e ) {} } } } } catch (OutOfMemoryError e) {}} } else { // there are still worker threads running, but some may be dead. // if we find dead workers, reanimate them synchronized(this.workerThreads) { for (int i = 0; i < this.workerThreads.length; i++) { if (deployCount <= 0 || this.result.sizeAvailable() >= neededResults || (this.rankingProcess.feedingIsFinished() && this.rankingProcess.sizeQueue() == 0)) { break; } if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { worker = new Worker(i, this.query.maxtime, this.query.snippetCacheStrategy, this.query.snippetMatcher, neededResults); worker.start(); this.workerThreads[i] = worker; deployCount--; } if (this.rankingProcess.expectMoreRemoteReferences()) { long wait = this.rankingProcess.waitTimeRecommendation(); if (wait > 0) { try {Thread.sleep(wait);} catch ( InterruptedException e ) {} } } } } } } public void stopAllWorker() { synchronized(this.workerThreads) { for (int i = 0; i < this.workerThreads.length; i++) { if (this.workerThreads[i] == null || !this.workerThreads[i].isAlive()) { continue; } this.workerThreads[i].pleaseStop(); this.workerThreads[i].interrupt(); } } } private boolean anyWorkerAlive() { if (this.workerThreads == null || this.workerThreads.length == 0) { return false; } synchronized(this.workerThreads) { for (final Worker workerThread : this.workerThreads) { if ((workerThread != null) && (workerThread.isAlive()) && (workerThread.busytime() < 10000)) { return true; } } } return false; } protected class Worker extends Thread { private final long timeout; // the date until this thread should try to work private long lastLifeSign; // when the last time the run()-loop was executed private final CacheStrategy cacheStrategy; private final int neededResults; private final Pattern snippetPattern; private boolean shallrun; private final SolrConnector solr; public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) { this.cacheStrategy = cacheStrategy; this.lastLifeSign = System.currentTimeMillis(); this.snippetPattern = snippetPattern; this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime); this.neededResults = neededResults; this.shallrun = true; this.solr = SnippetProcess.this.rankingProcess.getQuery().getSegment().getSolr(); } @Override public void run() { // start fetching urls and snippets URIMetadataRow page; ResultEntry resultEntry; //final int fetchAhead = snippetMode == 0 ? 0 : 10; final boolean nav_topics = SnippetProcess.this.query.navigators.equals("all") || SnippetProcess.this.query.navigators.indexOf("topics",0) >= 0; try { //System.out.println("DEPLOYED WORKER " + id + " FOR " + this.neededResults + " RESULTS, timeoutd = " + (this.timeout - System.currentTimeMillis())); int loops = 0; while (this.shallrun && System.currentTimeMillis() < this.timeout) { //Log.logInfo("SnippetProcess", "***** timeleft = " + (this.timeout - System.currentTimeMillis())); this.lastLifeSign = System.currentTimeMillis(); if (MemoryControl.shortStatus()) { break; } // check if we have enough; we stop only if we can fetch online; otherwise its better to run this to get better navigation if ((this.cacheStrategy == null || this.cacheStrategy.isAllowedToFetchOnline()) && SnippetProcess.this.result.sizeAvailable() >= this.neededResults) { //Log.logWarning("ResultFetcher", SnippetProcess.this.result.sizeAvailable() + " = result.sizeAvailable() >= this.neededResults = " + this.neededResults); break; } // check if we can succeed if we try to take another url if (SnippetProcess.this.rankingProcess.feedingIsFinished() && SnippetProcess.this.rankingProcess.sizeQueue() == 0) { //Log.logWarning("ResultFetcher", "rankingProcess.feedingIsFinished() && rankingProcess.sizeQueue() == 0"); break; } // get next entry page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(500, Math.max(20, this.timeout - System.currentTimeMillis()))); //if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false)); //if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis()); if (page == null) { //Log.logWarning("ResultFetcher", "page == null"); break; // no more available } this.setName(page.url().toNormalform(true, false)); // to support debugging if (SnippetProcess.this.query.filterfailurls && SnippetProcess.this.workTables.failURLsContains(page.hash())) { continue; } // in case that we have an attached solr, we load also the solr document String solrContent = null; if (this.solr != null) { SolrDocument sd = null; final SolrDocumentList sdl = this.solr.get(SolrField.id.getSolrFieldName()+ ":" + ASCII.String(page.hash()), 0, 1); if (sdl.size() > 0) { sd = sdl.get(0); } if (sd != null) { solrContent = Switchboard.getSwitchboard().solrScheme.solrGetText(sd); } } loops++; resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0 if (resultEntry == null) { continue; // the entry had some problems, cannot be used //final String rawLine = resultEntry.textSnippet() == null ? null : resultEntry.textSnippet().getLineRaw(); //System.out.println("***SNIPPET*** raw='" + rawLine + "', pattern='" + this.snippetPattern.toString() + "'"); //if (rawLine != null && !this.snippetPattern.matcher(rawLine).matches()) continue; } //if (result.contains(resultEntry)) continue; SnippetProcess.this.urlRetrievalAllTime += resultEntry.dbRetrievalTime; SnippetProcess.this.snippetComputationAllTime += resultEntry.snippetComputationTime; // place the result to the result vector // apply post-ranking long ranking = Long.valueOf(SnippetProcess.this.rankingProcess.getOrder().cardinal(resultEntry.word())); ranking += postRanking(resultEntry, SnippetProcess.this.rankingProcess.getTopicNavigator(10)); resultEntry.ranking = ranking; SnippetProcess.this.result.put(new ReverseElement(resultEntry, ranking)); // remove smallest in case of overflow if (nav_topics) { SnippetProcess.this.rankingProcess.addTopics(resultEntry); } } if (System.currentTimeMillis() >= this.timeout) { Log.logWarning("SnippetProcess", "worker ended with timoeout"); } //System.out.println("FINISHED WORKER " + id + " FOR " + this.neededResults + " RESULTS, loops = " + loops); } catch (final Exception e) { Log.logException(e); } //Log.logInfo("SEARCH", "resultWorker thread " + this.id + " terminated"); } public void pleaseStop() { this.shallrun = false; } /** * calculate the time since the worker has had the latest activity * @return time in milliseconds lasted since latest activity */ public long busytime() { return System.currentTimeMillis() - this.lastLifeSign; } } protected ResultEntry fetchSnippet(final URIMetadataRow page, final String solrText, final CacheStrategy cacheStrategy) { // Snippet Fetching can has 3 modes: // 0 - do not fetch snippets // 1 - fetch snippets offline only // 2 - online snippet fetch // load only urls if there was not yet a root url of that hash // find the url entry long startTime = System.currentTimeMillis(); if (page == null) { return null; } final long dbRetrievalTime = System.currentTimeMillis() - startTime; if (cacheStrategy == null) { final TextSnippet snippet = new TextSnippet( null, solrText, page, this.snippetFetchWordHashes, null, ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), 220, Integer.MAX_VALUE, !this.query.isLocal()); return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, 0); // result without snippet } // load snippet if (page.url().getContentDomain() == Classification.ContentDomain.TEXT || page.url().getContentDomain() == Classification.ContentDomain.ALL) { // attach text snippet startTime = System.currentTimeMillis(); final TextSnippet snippet = new TextSnippet( this.loader, solrText, page, this.snippetFetchWordHashes, cacheStrategy, ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), 180, Integer.MAX_VALUE, !this.query.isLocal()); final long snippetComputationTime = System.currentTimeMillis() - startTime; Log.logInfo("SEARCH", "text snippet load time for " + page.url() + ": " + snippetComputationTime + ", " + (!snippet.getErrorCode().fail() ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); if (!snippet.getErrorCode().fail()) { // we loaded the file and found the snippet return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, snippetComputationTime); // result with snippet attached } else if (cacheStrategy.mustBeOffline()) { // we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result // this may happen during a remote search, because snippet loading is omitted to retrieve results faster return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, snippetComputationTime); // result without snippet } else { // problems with snippet fetch if (this.snippetFetchWordHashes.has(Segment.catchallHash)) { // we accept that because the word cannot be on the page return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); } final String reason = "no text snippet; errorCode = " + snippet.getErrorCode(); if (this.deleteIfSnippetFail) { this.workTables.failURLsRegisterMissingWord(this.query.getSegment().termIndex(), page.url(), this.query.queryHashes, reason); } Log.logInfo("SEARCH", "sorted out url " + page.url().toNormalform(true, false) + " during search: " + reason); return null; } } else { return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, dbRetrievalTime, 0); // result without snippet } // finished, no more actions possible here } /** * delete a specific entry from the search results * this is used if the user clicks on a '-' sign beside the search result * @param urlhash * @return true if an entry was deleted, false otherwise */ public boolean delete(final String urlhash) { final Iterator> i = this.result.iterator(); Element entry; while (i.hasNext()) { entry = i.next(); if (urlhash.equals(ASCII.String(entry.getElement().url().hash()))) { i.remove(); return true; } } return false; } }