From 54a48b4184b2305045b7ecc05e6135def0d1647e Mon Sep 17 00:00:00 2001 From: apfelmaennchen Date: Fri, 12 Jun 2009 20:36:03 +0000 Subject: [PATCH] - added "did you mean" to search page - currently works for single word queries only! git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6057 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearch.html | 4 ++ htroot/yacysearch.java | 37 +++++++++++- source/de/anomic/tools/DidYouMean.java | 83 ++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 source/de/anomic/tools/DidYouMean.java diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index c21d1544b..a32766900 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -94,6 +94,10 @@ $(function() { + #(didYouMean)# + :: +

Did you mean: #{suggestions}# #[word]# #[sep]##{/suggestions}#

+ #(/didYouMean)# #(searchagain)# :: diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index ed967a4e9..1245c0a8a 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.util.HashMap; +import java.util.Iterator; import java.util.TreeSet; import de.anomic.content.RSSMessage; @@ -53,6 +54,7 @@ import de.anomic.server.serverDomains; import de.anomic.server.serverObjects; import de.anomic.server.serverProfiling; import de.anomic.server.serverSwitch; +import de.anomic.tools.DidYouMean; import de.anomic.tools.iso639; import de.anomic.tools.Formatter; import de.anomic.xml.RSSFeed; @@ -85,7 +87,7 @@ public class yacysearch { String querystring = originalquerystring; boolean fetchSnippets = (post != null && post.get("verify", "false").equals("true")); final serverObjects prop = new serverObjects(); - + //final boolean rss = (post == null) ? false : post.get("rss", "false").equals("true"); prop.put("promoteSearchPageGreeting", promoteSearchPageGreeting); prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(plasmaSwitchboardConstants.GREETING_HOMEPAGE, "")); @@ -465,6 +467,37 @@ public class yacysearch { theQuery.urlretrievaltime = theSearch.getURLRetrievalTime(); theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime(); sb.localSearches.add(theQuery); + + // check suggestions + DidYouMean didYouMean = new DidYouMean(sb); + Iterator meanIt = didYouMean.getSuggestion(querystring).iterator(); + int meanCount = 0; + String suggestion; + prop.put("didYouMean", 0); + while(meanIt.hasNext()) { + suggestion = meanIt.next(); + prop.put("didYouMean_suggestions_"+meanCount+"_word", suggestion); + prop.put("didYouMean_suggestions_"+meanCount+"_url", + "/yacysearch.html" + "?display=" + display + + "&search=" + suggestion + + "&maximumRecords="+ theQuery.displayResults() + + "&startRecord=" + (0 * theQuery.displayResults()) + + "&resource=" + ((theQuery.isLocal()) ? "local" : "global") + + "&verify=" + ((theQuery.onlineSnippetFetch) ? "true" : "false") + + "&nav=" + theQuery.navigators + + "&urlmaskfilter=" + originalUrlMask + + "&prefermaskfilter=" + theQuery.prefer + + "&cat=href&constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) + + "&contentdom=" + theQuery.contentdom() + + "&former=" + theQuery.queryString(true) + ); + prop.put("didYouMean_suggestions_"+meanCount+"_sep","|"); + meanCount++; + } + prop.put("didYouMean_suggestions_"+(meanCount-1)+"_sep",""); + if(meanCount > 0) + prop.put("didYouMean", 1); + prop.put("didYouMean_suggestions", meanCount); // update the search tracker try { @@ -580,7 +613,7 @@ public class yacysearch { // for RSS: don't HTML encode some elements prop.putXML("rss_query", originalquerystring); prop.put("rss_queryenc", originalquerystring.replace(' ', '+')); - + sb.localSearchLastAccess = System.currentTimeMillis(); // return rewrite properties diff --git a/source/de/anomic/tools/DidYouMean.java b/source/de/anomic/tools/DidYouMean.java new file mode 100644 index 000000000..6a7d23ee2 --- /dev/null +++ b/source/de/anomic/tools/DidYouMean.java @@ -0,0 +1,83 @@ +package de.anomic.tools; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.parser.Word; + +// People make mistakes when they type words. +// The most common mistakes are the four categories listed below: +// (1) Changing one letter: bat / cat; +// (2) Adding one letter: bat / boat; +// (3) Deleting one letter: frog / fog; or +// (4) Reversing two consecutive letters: two / tow. + +public class DidYouMean { + + private static char[] alphabet = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p', + 'q','r','s','t','u','v','w','x','y','z','ä','ö','ü','ß'}; + private final Set set; + private String word; + private int len; + private final plasmaSwitchboard sb; + + public DidYouMean(final plasmaSwitchboard env) { + this.set = new HashSet(); + this.word = ""; + this.len = 0; + this.sb = env; + } + + public Set getSuggestion(String word) { + this.word = word.toLowerCase(); + this.len = word.length(); + ChangingOneLetter(); + AddingOneLetter(); + DeletingOneLetter(); + ReversingTwoConsecutiveLetters(); + Iterator it = this.set.iterator(); + String s; + final HashSet rset = new HashSet(); + while(it.hasNext()) { + s = it.next(); + if(sb.indexSegment.termIndex().has(Word.word2hash(s))) { + rset.add(s); + } + } + rset.remove(word.toLowerCase()); + return rset; + } + + private void ChangingOneLetter() { + for(int i=0; i