- fix for snippets that are too short

- added keyword to snippet fetch to suppres removal of not-found snippet words (for debugging)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3009 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2006-11-25 00:38:09 +00:00
parent f7176d2f6f
commit b5a29e9651
3 changed files with 7 additions and 4 deletions

View File

@ -13,7 +13,7 @@ function AllSnippets() {
function requestSnippet(url, query){
var request=createRequestObject();
request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&search=' + escape(query),true);
request.open('get', '/xml/snippet.xml?url=' + escape(url) + '&search=' + escape(query) + '&remove=true',true);
request.onreadystatechange = function () {handleState(request)};
request.send(null);
}

View File

@ -25,6 +25,9 @@ public class snippet {
String urlString = post.get("url", "");
URL url = new URL(urlString);
// if 'remove' is set to true, then RWI references to URLs that do not have the snippet are removed
boolean remove = post.get("remove", "false").equals("true");
String querystring = post.get("search", "").trim();
if ((querystring.length() > 2) && (querystring.charAt(0) == '"') && (querystring.charAt(querystring.length() - 1) == '"')) {
querystring = querystring.substring(1, querystring.length() - 1).trim();
@ -47,7 +50,7 @@ public class snippet {
prop.put("text", (snippet.exists()) ? "<![CDATA["+snippet.getLineMarked(queryHashes)+"]]>" : "unknown");
} else {
String error = snippet.getError();
if (error.equals("no matching snippet found")) {
if ((remove) && (error.equals("no matching snippet found"))) {
switchboard.removeReferences(plasmaURL.urlHash(url), query);
}
prop.put("text", error);

View File

@ -269,7 +269,7 @@ public class plasmaSnippetCache {
* COMPUTE SNIPPET
* =========================================================================== */
// we have found a parseable non-empty file: use the lines
line = computeSnippet(sentences, queryhashes, 8 + 6 * queryhashes.size(), snippetMaxLength);
line = computeSnippet(sentences, queryhashes, 3 * queryhashes.size(), snippetMaxLength);
//System.out.println("loaded snippet for URL " + url + ": " + line);
if (line == null) return new Snippet(null, ERROR_NO_MATCH, "no matching snippet found");
if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength);
@ -378,7 +378,7 @@ public class plasmaSnippetCache {
String sentence;
while (sentences.hasMoreElements()) {
sentence = (String) sentences.nextElement();
//System.out.println("Sentence " + i + ": " + sentences[i]);
//System.out.println("Snippet-Sentence :" + sentence); // DEBUG
if (sentence.length() > minLength) {
hs = hashSentence(sentence);
j = queryhashes.iterator();