mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added test case for TextSnippet,
removed obsolete/unused parameter and reference to MediaSnippet
This commit is contained in:
parent
cb2c17d236
commit
431a5f9c4e
|
@ -1246,7 +1246,7 @@ public final class SearchEvent {
|
||||||
if (solrsnippet != null && solrsnippet.size() > 0) {
|
if (solrsnippet != null && solrsnippet.size() > 0) {
|
||||||
OpensearchResponseWriter.removeSubsumedTitle(solrsnippet, node.dc_title());
|
OpensearchResponseWriter.removeSubsumedTitle(solrsnippet, node.dc_title());
|
||||||
final TextSnippet snippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippet), true, ResultClass.SOURCE_CACHE, "");
|
final TextSnippet snippet = new TextSnippet(node.hash(), OpensearchResponseWriter.getLargestSnippet(solrsnippet), true, ResultClass.SOURCE_CACHE, "");
|
||||||
ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, null, 0);
|
ResultEntry re = new ResultEntry(node, this.query.getSegment(), this.peers, snippet, 0);
|
||||||
addResult(re);
|
addResult(re);
|
||||||
success = true;
|
success = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1375,7 +1375,7 @@ public final class SearchEvent {
|
||||||
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
|
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
|
||||||
SearchEvent.SNIPPET_MAX_LENGTH,
|
SearchEvent.SNIPPET_MAX_LENGTH,
|
||||||
!this.query.isLocal());
|
!this.query.isLocal());
|
||||||
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, 0); // result without snippet
|
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, 0); // result without snippet
|
||||||
}
|
}
|
||||||
|
|
||||||
// load snippet
|
// load snippet
|
||||||
|
@ -1396,16 +1396,16 @@ public final class SearchEvent {
|
||||||
|
|
||||||
if (!snippet.getErrorCode().fail()) {
|
if (!snippet.getErrorCode().fail()) {
|
||||||
// we loaded the file and found the snippet
|
// we loaded the file and found the snippet
|
||||||
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, snippetComputationTime); // result with snippet attached
|
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, snippetComputationTime); // result with snippet attached
|
||||||
} else if (cacheStrategy.mustBeOffline()) {
|
} else if (cacheStrategy.mustBeOffline()) {
|
||||||
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
|
// we did not demand online loading, therefore a failure does not mean that the missing snippet causes a rejection of this result
|
||||||
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster
|
// this may happen during a remote search, because snippet loading is omitted to retrieve results faster
|
||||||
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, snippetComputationTime); // result without snippet
|
return new ResultEntry(page, this.query.getSegment(), this.peers, null, snippetComputationTime); // result without snippet
|
||||||
} else {
|
} else {
|
||||||
// problems with snippet fetch
|
// problems with snippet fetch
|
||||||
if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
|
if (this.snippetFetchWordHashes.has(Segment.catchallHash)) {
|
||||||
// we accept that because the word cannot be on the page
|
// we accept that because the word cannot be on the page
|
||||||
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0);
|
return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0);
|
||||||
}
|
}
|
||||||
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
|
final String reason = "no text snippet; errorCode = " + snippet.getErrorCode();
|
||||||
if (this.deleteIfSnippetFail) {
|
if (this.deleteIfSnippetFail) {
|
||||||
|
@ -1415,7 +1415,7 @@ public final class SearchEvent {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new ResultEntry(page, this.query.getSegment(), this.peers, null, null, 0); // result without snippet
|
return new ResultEntry(page, this.query.getSegment(), this.peers, null, 0); // result without snippet
|
||||||
}
|
}
|
||||||
|
|
||||||
public ResultEntry oneResult(final int item, final long timeout) {
|
public ResultEntry oneResult(final int item, final long timeout) {
|
||||||
|
|
|
@ -57,7 +57,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
||||||
private String alternative_urlstring;
|
private String alternative_urlstring;
|
||||||
private String alternative_urlname;
|
private String alternative_urlname;
|
||||||
private final TextSnippet textSnippet;
|
private final TextSnippet textSnippet;
|
||||||
private final List<MediaSnippet> mediaSnippets;
|
|
||||||
private final Segment indexSegment;
|
private final Segment indexSegment;
|
||||||
|
|
||||||
// statistic objects
|
// statistic objects
|
||||||
|
@ -67,7 +66,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
||||||
final Segment indexSegment,
|
final Segment indexSegment,
|
||||||
SeedDB peers,
|
SeedDB peers,
|
||||||
final TextSnippet textSnippet,
|
final TextSnippet textSnippet,
|
||||||
final List<MediaSnippet> mediaSnippets,
|
|
||||||
final long snippetComputationTime) {
|
final long snippetComputationTime) {
|
||||||
this.urlentry = urlentry;
|
this.urlentry = urlentry;
|
||||||
this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
|
this.urlentry.setField(CollectionSchema.text_t.getSolrFieldName(), ""); // clear the text field which eats up most of the space; it was used for snippet computation which is in a separate field here
|
||||||
|
@ -75,7 +73,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
||||||
this.alternative_urlstring = null;
|
this.alternative_urlstring = null;
|
||||||
this.alternative_urlname = null;
|
this.alternative_urlname = null;
|
||||||
this.textSnippet = textSnippet;
|
this.textSnippet = textSnippet;
|
||||||
this.mediaSnippets = mediaSnippets;
|
|
||||||
this.snippetComputationTime = snippetComputationTime;
|
this.snippetComputationTime = snippetComputationTime;
|
||||||
final String host = urlentry.url().getHost();
|
final String host = urlentry.url().getHost();
|
||||||
if (host != null && host.endsWith(".yacyh")) {
|
if (host != null && host.endsWith(".yacyh")) {
|
||||||
|
@ -163,9 +160,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
||||||
public TextSnippet textSnippet() {
|
public TextSnippet textSnippet() {
|
||||||
return this.textSnippet;
|
return this.textSnippet;
|
||||||
}
|
}
|
||||||
public List<MediaSnippet> mediaSnippets() {
|
|
||||||
return this.mediaSnippets;
|
|
||||||
}
|
|
||||||
public Date modified() {
|
public Date modified() {
|
||||||
return this.urlentry.moddate();
|
return this.urlentry.moddate();
|
||||||
}
|
}
|
||||||
|
@ -211,9 +205,6 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
|
||||||
public boolean hasTextSnippet() {
|
public boolean hasTextSnippet() {
|
||||||
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
|
return (this.textSnippet != null) && (!this.textSnippet.getErrorCode().fail());
|
||||||
}
|
}
|
||||||
public boolean hasMediaSnippets() {
|
|
||||||
return (this.mediaSnippets != null) && (!this.mediaSnippets.isEmpty());
|
|
||||||
}
|
|
||||||
public String resource() {
|
public String resource() {
|
||||||
// generate transport resource
|
// generate transport resource
|
||||||
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
|
if ((this.textSnippet == null) || (!this.textSnippet.exists())) {
|
||||||
|
|
|
@ -320,30 +320,14 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
|
||||||
sentences = null;
|
sentences = null;
|
||||||
} //encapsulate potential expensive sentences END
|
} //encapsulate potential expensive sentences END
|
||||||
|
|
||||||
// compute snippet from media - attention document closed above!
|
if (textline == null || !remainingHashes.isEmpty()) {
|
||||||
//String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
|
|
||||||
//String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
|
|
||||||
//String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
|
|
||||||
//String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
|
|
||||||
//String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
|
|
||||||
|
|
||||||
snippetLine = "";
|
|
||||||
//if (audioline != null) line += (line.isEmpty()) ? audioline : "<br />" + audioline;
|
|
||||||
//if (videoline != null) line += (line.isEmpty()) ? videoline : "<br />" + videoline;
|
|
||||||
//if (appline != null) line += (line.isEmpty()) ? appline : "<br />" + appline;
|
|
||||||
//if (hrefline != null) line += (line.isEmpty()) ? hrefline : "<br />" + hrefline;
|
|
||||||
//if (textline != null) snippetLine += (snippetLine.isEmpty()) ? textline : "<br />" + textline;
|
|
||||||
|
|
||||||
if (snippetLine == null || !remainingHashes.isEmpty()) {
|
|
||||||
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
|
init(url.hash(), null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
|
if (snippetLine.length() > snippetMaxLength) snippetLine = snippetLine.substring(0, snippetMaxLength);
|
||||||
|
|
||||||
// finally store this snippet in our own cache
|
// finally store this snippet in our own cache
|
||||||
snippetsCache.put(wordhashes, urls, snippetLine);
|
snippetsCache.put(wordhashes, urls, textline);
|
||||||
|
|
||||||
// document.close();
|
|
||||||
init(url.hash(), snippetLine, false, source, null);
|
init(url.hash(), snippetLine, false, source, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
120
test/net/yacy/search/snippet/TextSnippetTest.java
Normal file
120
test/net/yacy/search/snippet/TextSnippetTest.java
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
|
||||||
|
package net.yacy.search.snippet;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.encoding.ASCII;
|
||||||
|
import net.yacy.cora.document.id.DigestURL;
|
||||||
|
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||||
|
import net.yacy.cora.storage.HandleSet;
|
||||||
|
import net.yacy.kelondro.data.meta.URIMetadataNode;
|
||||||
|
import net.yacy.search.query.QueryGoal;
|
||||||
|
import net.yacy.search.schema.CollectionSchema;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
|
public class TextSnippetTest {
|
||||||
|
|
||||||
|
// declare some required parameter
|
||||||
|
final CacheStrategy cacheStrategy = CacheStrategy.CACHEONLY;
|
||||||
|
final boolean pre = true;
|
||||||
|
final int snippetMaxLength = 220;
|
||||||
|
final boolean reindexing = false;
|
||||||
|
|
||||||
|
SolrDocument doc;
|
||||||
|
|
||||||
|
public TextSnippetTest() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
|
||||||
|
// prepare a empty test document
|
||||||
|
doc = new SolrDocument();
|
||||||
|
DigestURL url = new DigestURL("http://localhost/page.html");
|
||||||
|
doc.addField(CollectionSchema.id.name(), ASCII.String(url.hash()));
|
||||||
|
doc.addField(CollectionSchema.sku.name(),url.toString());
|
||||||
|
// for testcases add other fields
|
||||||
|
// fields involved in snippet extraction:
|
||||||
|
// url, title, keywords, author, text_t
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTextSnippet() {
|
||||||
|
|
||||||
|
URIMetadataNode testpage = new URIMetadataNode(doc);
|
||||||
|
testpage.addField(CollectionSchema.title.name(), "New test case");
|
||||||
|
testpage.addField(CollectionSchema.keywords.name(), "junit");
|
||||||
|
testpage.addField(CollectionSchema.author.name(), "test author");
|
||||||
|
testpage.addField(CollectionSchema.text_t.name(), "A new testcase has been introduced. "
|
||||||
|
+ "It includes a few test lines and one line that should match.");
|
||||||
|
|
||||||
|
String querywords = "testcase line";
|
||||||
|
QueryGoal qg = new QueryGoal(querywords);
|
||||||
|
HandleSet queryhashes = qg.getIncludeHashes();
|
||||||
|
|
||||||
|
TextSnippet ts = new TextSnippet(
|
||||||
|
null,
|
||||||
|
testpage,
|
||||||
|
queryhashes,
|
||||||
|
cacheStrategy,
|
||||||
|
pre,
|
||||||
|
snippetMaxLength,
|
||||||
|
reindexing
|
||||||
|
);
|
||||||
|
String rstr = ts.getError();
|
||||||
|
assertEquals("testTextSnippet Error Code: ", "", rstr);
|
||||||
|
|
||||||
|
String[] wordlist = querywords.split(" ");
|
||||||
|
rstr = ts.toString();
|
||||||
|
System.out.println("testTextSnippet: query=" + querywords);
|
||||||
|
System.out.println("testTextSnippet: snippet=" + rstr);
|
||||||
|
// check words included in snippet
|
||||||
|
for (String word : wordlist) {
|
||||||
|
assertTrue("testTextSnippet word included " + word, rstr.contains(word));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of getLineMarked method, of class TextSnippet.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGetLineMarked() {
|
||||||
|
URIMetadataNode testpage = new URIMetadataNode(doc);
|
||||||
|
testpage.addField(CollectionSchema.title.name(), "New test case");
|
||||||
|
testpage.addField(CollectionSchema.keywords.name(), "junit");
|
||||||
|
testpage.addField(CollectionSchema.author.name(), "test author");
|
||||||
|
testpage.addField(CollectionSchema.text_t.name(),
|
||||||
|
"A new testcase has been introduced. "
|
||||||
|
+ "It includes a few test lines and one line that should match.");
|
||||||
|
|
||||||
|
String querywords = "testcase line";
|
||||||
|
QueryGoal qg = new QueryGoal(querywords);
|
||||||
|
HandleSet queryhashes = qg.getIncludeHashes();
|
||||||
|
|
||||||
|
TextSnippet ts = new TextSnippet(
|
||||||
|
null,
|
||||||
|
testpage,
|
||||||
|
queryhashes,
|
||||||
|
cacheStrategy,
|
||||||
|
pre,
|
||||||
|
snippetMaxLength,
|
||||||
|
reindexing
|
||||||
|
);
|
||||||
|
|
||||||
|
String rstr = ts.getError();
|
||||||
|
assertEquals("testGetLineMarked Error Code: ", "", rstr);
|
||||||
|
|
||||||
|
// check words marked in snippet
|
||||||
|
rstr = ts.getLineMarked(qg);
|
||||||
|
System.out.println("testGetLineMarked: query=" + querywords);
|
||||||
|
System.out.println("testGetLineMarked: snippet=" + rstr);
|
||||||
|
String[] wordlist = querywords.split(" ");
|
||||||
|
for (String wordstr : wordlist) {
|
||||||
|
assertTrue("testGetLineMarked marked word " + wordstr, rstr.contains("<b>" + wordstr + "</b>"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user