Easier tracking of longest text snippets initializations

When text snippets statistics are enabled and FINE log level is enabled
on the TextSnippetStatistics class.
This commit is contained in:
luccioman 2018-05-01 09:58:05 +02:00
parent 3c4344cb12
commit 3b89c232db
4 changed files with 34 additions and 25 deletions

View File

@ -1874,7 +1874,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
final TextSnippet yacysnippet = new TextSnippet(this.loader,
node,
this.query.getQueryGoal().getIncludeHashes(),

View File

@ -154,13 +154,13 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
private ResultClass resultStatus;
public TextSnippet(
final byte[] urlhash,
final DigestURL url,
final String line,
final boolean isMarked,
final ResultClass errorCode,
final String errortext) {
long beginTime = System.currentTimeMillis();
init(urlhash, line, isMarked, errorCode, errortext, beginTime);
init(url, line, isMarked, errorCode, errortext, beginTime);
}
public TextSnippet(
@ -177,7 +177,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
final DigestURL url = row.url();
if (queryhashes.isEmpty()) {
//System.out.println("found no queryhashes for URL retrieve " + url);
init(url.hash(), null, false, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given", beginTime);
init(url, null, false, ResultClass.ERROR_NO_HASH_GIVEN, "no query hashes given", beginTime);
return;
}
@ -188,7 +188,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
final String snippetLine = snippetsCache.get(wordhashes, urls);
if (snippetLine != null) {
// found the snippet
init(url.hash(), snippetLine, false, source, null, beginTime);
init(url, snippetLine, false, source, null, beginTime);
return;
}
@ -239,7 +239,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
if (sentences == null) {
// not found the snippet
init(url.hash(), null, false, ResultClass.SOURCE_METADATA, null, beginTime);
init(url, null, false, ResultClass.SOURCE_METADATA, null, beginTime);
return;
}
@ -249,7 +249,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
return;
}
}
@ -293,7 +293,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
}
}
}
init(url.hash(), textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null, beginTime);
init(url, textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null, beginTime);
return;
}
sentences = null; // we don't need this here any more
@ -309,12 +309,12 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
if (response == null) {
// in case that we did not get any result we can still return a success when we are not allowed to go online
if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
init(url.hash(), null, false, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry", beginTime);
init(url, null, false, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry", beginTime);
return;
}
// if it is still not available, report an error
init(url.hash(), null, false, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry", beginTime);
init(url, null, false, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry", beginTime);
return;
}
@ -329,11 +329,11 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
try {
document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
} catch (final Parser.Failure e) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_FAILED, e.getMessage(), beginTime); // cannot be parsed
init(url, null, false, ResultClass.ERROR_PARSER_FAILED, e.getMessage(), beginTime); // cannot be parsed
return;
}
if (document == null) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_FAILED, "parser error/failed", beginTime); // cannot be parsed
init(url, null, false, ResultClass.ERROR_PARSER_FAILED, "parser error/failed", beginTime); // cannot be parsed
return;
}
@ -342,7 +342,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
document.close();
if (sentences == null) {
init(url.hash(), null, false, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences", beginTime);
init(url, null, false, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences", beginTime);
return;
}
@ -351,20 +351,20 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
textline = tsr.getSnippet();
remainingHashes = tsr.getRemainingWords();
} catch (final UnsupportedOperationException e) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
return;
}
sentences = null;
if (textline == null || !remainingHashes.isEmpty()) {
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found", beginTime);
init(url, null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found", beginTime);
return;
}
if (textline.length() > snippetMaxLength) textline = textline.substring(0, snippetMaxLength);
// finally store this snippet in our own cache
snippetsCache.put(wordhashes, urls, textline);
init(url.hash(), textline, false, source, null, beginTime);
init(url, textline, false, source, null, beginTime);
}
/**
@ -378,18 +378,18 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
* @param beginTime the time in milliseconds when TextSnippet creation started
*/
private void init(
final byte[] urlhash,
final DigestURL url,
final String line,
final boolean isMarked,
final ResultClass errorCode,
final String errortext,
final long beginTime) {
this.urlhash = urlhash;
this.urlhash = url.hash();
this.line = line;
this.isMarked = isMarked;
this.resultStatus = errorCode;
this.error = errortext;
TextSnippet.statistics.addTextSnippetStatistics(System.currentTimeMillis() - beginTime, this.resultStatus);
TextSnippet.statistics.addTextSnippetStatistics(url, System.currentTimeMillis() - beginTime, this.resultStatus);
}
/**

View File

@ -26,6 +26,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.LongBinaryOperator;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.snippet.TextSnippet.ResultClass;
@ -34,6 +36,9 @@ import net.yacy.search.snippet.TextSnippet.ResultClass;
*/
public class TextSnippetStatistics {
/** Logs handler */
private static final ConcurrentLog logger = new ConcurrentLog(TextSnippetStatistics.class.getName());
/** Total number of TextSnippet instances created since last JVM start */
private AtomicLong totalSnippets = new AtomicLong(0);
@ -141,17 +146,21 @@ public class TextSnippetStatistics {
* @param resultStatus
* the snippet result status.
*/
public void addTextSnippetStatistics(final long initTime, final ResultClass resultStatus) {
public void addTextSnippetStatistics(final DigestURL url, final long initTime, final ResultClass resultStatus) {
if (this.enabled.get() && resultStatus != null) {
this.totalSnippets.incrementAndGet();
this.totalInitTime.addAndGet(initTime);
this.maxInitTime.accumulateAndGet(initTime, new LongBinaryOperator() {
if(initTime == this.maxInitTime.accumulateAndGet(initTime, new LongBinaryOperator() {
@Override
public long applyAsLong(long currentValue, long updateValue) {
return currentValue < updateValue ? updateValue : currentValue;
}
});
})) {
if(logger.isFine()) {
logger.fine("New max snippet init time : status " + resultStatus + " in " + initTime + " ms for URL " + url);
}
}
if (resultStatus != null) {
switch (resultStatus) {

View File

@ -133,7 +133,7 @@ public class TextSnippetTest {
// test with raw line (no marking added by YaCy)
TextSnippet ts = new TextSnippet(
url.hash(),
url,
rawtestline,
true, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
@ -144,7 +144,7 @@ public class TextSnippetTest {
// test with marking of query word
ts = new TextSnippet(
url.hash(),
url,
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
@ -157,7 +157,7 @@ public class TextSnippetTest {
// test text with some numbers (english/german format)
rawtestline = "Test Version 1.83 calculates pi to 3,14 always";
ts = new TextSnippet(
url.hash(),
url,
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");