- added "did you mean" to search page

- currently works for single word queries only!

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6057 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
apfelmaennchen 2009-06-12 20:36:03 +00:00
parent 31360ba40c
commit 54a48b4184
3 changed files with 122 additions and 2 deletions

View File

@ -94,6 +94,10 @@ $(function() {
<input type="hidden" name="constraint" value="#[constraint]#" />
</fieldset>
</form>
#(didYouMean)#
::
<p><b>Did you mean:</b> #{suggestions}# <a href="#[url]#">#[word]#</a> #[sep]##{/suggestions}#</p>
#(/didYouMean)#
#(searchagain)#
::

View File

@ -29,6 +29,7 @@
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.TreeSet;
import de.anomic.content.RSSMessage;
@ -53,6 +54,7 @@ import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
import de.anomic.tools.DidYouMean;
import de.anomic.tools.iso639;
import de.anomic.tools.Formatter;
import de.anomic.xml.RSSFeed;
@ -85,7 +87,7 @@ public class yacysearch {
String querystring = originalquerystring;
boolean fetchSnippets = (post != null && post.get("verify", "false").equals("true"));
final serverObjects prop = new serverObjects();
//final boolean rss = (post == null) ? false : post.get("rss", "false").equals("true");
prop.put("promoteSearchPageGreeting", promoteSearchPageGreeting);
prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(plasmaSwitchboardConstants.GREETING_HOMEPAGE, ""));
@ -465,6 +467,37 @@ public class yacysearch {
theQuery.urlretrievaltime = theSearch.getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime();
sb.localSearches.add(theQuery);
// check suggestions
DidYouMean didYouMean = new DidYouMean(sb);
Iterator<String> meanIt = didYouMean.getSuggestion(querystring).iterator();
int meanCount = 0;
String suggestion;
prop.put("didYouMean", 0);
while(meanIt.hasNext()) {
suggestion = meanIt.next();
prop.put("didYouMean_suggestions_"+meanCount+"_word", suggestion);
prop.put("didYouMean_suggestions_"+meanCount+"_url",
"/yacysearch.html" + "?display=" + display +
"&search=" + suggestion +
"&maximumRecords="+ theQuery.displayResults() +
"&startRecord=" + (0 * theQuery.displayResults()) +
"&resource=" + ((theQuery.isLocal()) ? "local" : "global") +
"&verify=" + ((theQuery.onlineSnippetFetch) ? "true" : "false") +
"&nav=" + theQuery.navigators +
"&urlmaskfilter=" + originalUrlMask +
"&prefermaskfilter=" + theQuery.prefer +
"&cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
"&contentdom=" + theQuery.contentdom() +
"&former=" + theQuery.queryString(true)
);
prop.put("didYouMean_suggestions_"+meanCount+"_sep","|");
meanCount++;
}
prop.put("didYouMean_suggestions_"+(meanCount-1)+"_sep","");
if(meanCount > 0)
prop.put("didYouMean", 1);
prop.put("didYouMean_suggestions", meanCount);
// update the search tracker
try {
@ -580,7 +613,7 @@ public class yacysearch {
// for RSS: don't HTML encode some elements
prop.putXML("rss_query", originalquerystring);
prop.put("rss_queryenc", originalquerystring.replace(' ', '+'));
sb.localSearchLastAccess = System.currentTimeMillis();
// return rewrite properties

View File

@ -0,0 +1,83 @@
package de.anomic.tools;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.parser.Word;
// People make mistakes when they type words.
// The most common mistakes are the four categories listed below:
// (1) Changing one letter: bat / cat;
// (2) Adding one letter: bat / boat;
// (3) Deleting one letter: frog / fog; or
// (4) Reversing two consecutive letters: two / tow.
public class DidYouMean {
private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p',
'q','r','s','t','u','v','w','x','y','z','ä','ö','ü','ß'};
private final Set<String> set;
private String word;
private int len;
private final plasmaSwitchboard sb;
public DidYouMean(final plasmaSwitchboard env) {
this.set = new HashSet<String>();
this.word = "";
this.len = 0;
this.sb = env;
}
public Set<String> getSuggestion(String word) {
this.word = word.toLowerCase();
this.len = word.length();
ChangingOneLetter();
AddingOneLetter();
DeletingOneLetter();
ReversingTwoConsecutiveLetters();
Iterator<String> it = this.set.iterator();
String s;
final HashSet<String> rset = new HashSet<String>();
while(it.hasNext()) {
s = it.next();
if(sb.indexSegment.termIndex().has(Word.word2hash(s))) {
rset.add(s);
}
}
rset.remove(word.toLowerCase());
return rset;
}
private void ChangingOneLetter() {
for(int i=0; i<this.len; i++) {
for(int j=0; j<alphabet.length; j++) {
this.set.add(this.word.substring(0, i) + alphabet[j] + this.word.substring(i+1));
}
}
}
private void DeletingOneLetter() {
for(int i=0; i<this.len;i++) {
this.set.add(this.word.substring(0, i) + this.word.substring(i+1));
}
}
private void AddingOneLetter() {
for(int i=0; i<this.len;i++) {
for(int j=0; j<alphabet.length; j++) {
this.set.add(this.word.substring(0, i) + alphabet[j] + this.word.substring(i));
}
}
}
private void ReversingTwoConsecutiveLetters() {
for(int i=0; i<this.word.length()-1; i++) {
this.set.add(this.word.substring(0,i)+this.word.charAt(i+1)+this.word.charAt(i)+this.word.substring(i+2));
}
}
}