vocabularies are now also used as source for a did-you-mean computation

This commit is contained in:
Michael Christen 2012-01-08 02:13:52 +01:00
parent eaec14ecc4
commit fa8da7f89d
3 changed files with 14 additions and 0 deletions

View File

@ -89,6 +89,10 @@ public class Autotagging {
}
}
public Set<String> allTags() {
return this.allTags.keySet();
}
public void addDictionaries(Map<String, Dictionary> dictionaries) {
for (Map.Entry<String, Dictionary> entry: dictionaries.entrySet()) {
Vocabulary voc = new Vocabulary(entry.getKey(), entry.getValue());

View File

@ -32,6 +32,7 @@ import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
@ -111,8 +112,11 @@ public class LibraryProvider
integrateOpenGeoDB();
integrateGeonames();
initAutotagging(tagPrefix);
Set<String> allTags = new HashSet<String>() ;
allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity
autotagging.addLocalization(geoLoc);
autotagging.addDictionaries(dymLib.getDictionaries());
WordCache.learn(allTags);
}
public static void integrateOpenGeoDB() {

View File

@ -220,6 +220,12 @@ public class WordCache {
}
}
public static void learn(Set<String> wordset) {
for (String s: wordset) {
learn(new StringBuilder(s));
}
}
/**
* scan the input directory and load all dictionaries (again)
*/