mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git
This commit is contained in:
commit
972d411d99
|
@ -57,7 +57,9 @@ public class DictionaryLoader_p {
|
|||
prop.put(dictionary.nickname + "ActionDeactivated", 0);
|
||||
}
|
||||
|
||||
if (post == null) return prop;
|
||||
if (post == null) {
|
||||
return prop;
|
||||
}
|
||||
|
||||
// GEON0
|
||||
if (post.containsKey("geon0Load")) {
|
||||
|
@ -67,6 +69,7 @@ public class DictionaryLoader_p {
|
|||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
|
||||
LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
|
||||
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
|
||||
prop.put("geon0ActionLoaded", 1);
|
||||
} catch (final MalformedURLException e) {
|
||||
|
@ -96,6 +99,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geon0Activate")) {
|
||||
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
|
||||
LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
|
||||
prop.put("geon0ActionActivated", 1);
|
||||
}
|
||||
|
||||
|
@ -108,6 +112,7 @@ public class DictionaryLoader_p {
|
|||
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
|
||||
LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
|
||||
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
|
||||
prop.put("geo1ActionLoaded", 1);
|
||||
} catch (final MalformedURLException e) {
|
||||
|
@ -137,6 +142,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geo1Activate")) {
|
||||
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
|
||||
LibraryProvider.autotagging.addLocalization(LibraryProvider.geoLoc);
|
||||
prop.put("geo1ActionActivated", 1);
|
||||
}
|
||||
|
||||
|
|
238
source/net/yacy/document/Autotagging.java
Normal file
238
source/net/yacy/document/Autotagging.java
Normal file
|
@ -0,0 +1,238 @@
|
|||
/**
|
||||
* Autotagging
|
||||
* Copyright 2012 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 07.01.2012 on http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.document.geolocalization.Localization;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
||||
/**
|
||||
* Autotagging provides a set of tag/print-name properties which can be used to
|
||||
* - create tags from texts automatically
|
||||
* - create navigation entries for given tags
|
||||
*/
|
||||
public class Autotagging {
|
||||
|
||||
final static Object PRESENT = new Object();
|
||||
|
||||
final char prefixChar;
|
||||
final File autotaggingPath;
|
||||
final Map<String, Vocabulary> vocabularies;
|
||||
final Map<String, Object> allTags;
|
||||
|
||||
public Autotagging(final File autotaggingPath, char prefixChar) {
|
||||
this.vocabularies = new ConcurrentHashMap<String, Vocabulary>();
|
||||
this.autotaggingPath = autotaggingPath;
|
||||
this.prefixChar = prefixChar;
|
||||
this.allTags = new ConcurrentHashMap<String, Object>();
|
||||
reload();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* scan the input directory and load all tag tables (again)
|
||||
* a tag table is a property file where
|
||||
* the key is the tag name
|
||||
* the value is the visible name for the tag (shown in a navigator)
|
||||
* properties without values are allowed (the value is then set to the key)
|
||||
* also the value can be used as a tag
|
||||
*/
|
||||
public void reload() {
|
||||
this.vocabularies.clear();
|
||||
this.allTags.clear();
|
||||
if (this.autotaggingPath == null || !this.autotaggingPath.exists()) {
|
||||
return;
|
||||
}
|
||||
final String[] files = this.autotaggingPath.list();
|
||||
for (final String f: files) {
|
||||
if (f.endsWith(".vocabulary")) {
|
||||
try {
|
||||
File ff = new File(this.autotaggingPath, f);
|
||||
String vocName = ff.getName();
|
||||
vocName = vocName.substring(0, vocName.length() - 11);
|
||||
Vocabulary voc = new Vocabulary(vocName, ff);
|
||||
this.vocabularies.put(vocName, voc);
|
||||
for (String t: voc.tags()) {
|
||||
this.allTags.put(t, PRESENT);
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public void addDidYouMean(WordCache wordCache) {
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
public void addLocalization(Localization localization) {
|
||||
Vocabulary voc = new Vocabulary("Locale", localization);
|
||||
this.vocabularies.put("Locale", voc);
|
||||
for (String t: voc.tags()) {
|
||||
this.allTags.put(t, PRESENT);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* produce a set of tags for a given text.
|
||||
* The set contains the names of the tags with a prefix character at the front
|
||||
* @param text
|
||||
* @return
|
||||
*/
|
||||
public Set<String> tags(String text) {
|
||||
Set<String> as = new HashSet<String>();
|
||||
|
||||
return as;
|
||||
}
|
||||
|
||||
public static class Vocabulary {
|
||||
|
||||
final String navigatorName;
|
||||
final Map<String, String> tag2print, print2tag;
|
||||
|
||||
public Vocabulary(String name) {
|
||||
this.navigatorName = name;
|
||||
this.tag2print = new ConcurrentHashMap<String, String>();
|
||||
this.print2tag = new ConcurrentHashMap<String, String>();
|
||||
}
|
||||
|
||||
public Vocabulary(String name, File propFile) throws IOException {
|
||||
this(name);
|
||||
ArrayList<String> list = FileUtils.getListArray(propFile);
|
||||
String k, v;
|
||||
String[] tags;
|
||||
int p;
|
||||
vocloop: for (String line: list) {
|
||||
line = line.trim();
|
||||
if (line.length() == 0 || line.charAt(0) == '#') {
|
||||
continue vocloop;
|
||||
}
|
||||
p = line.indexOf(':');
|
||||
if (p < 0) {
|
||||
p = line.indexOf('=');
|
||||
}
|
||||
if (p < 0) {
|
||||
p = line.indexOf('\t');
|
||||
}
|
||||
if (p < 0) {
|
||||
this.tag2print.put(line, line);
|
||||
this.print2tag.put(line, line);
|
||||
continue vocloop;
|
||||
}
|
||||
k = line.substring(0, p).trim();
|
||||
v = line.substring(p + 1);
|
||||
tags = v.split(",");
|
||||
tagloop: for (String t: tags) {
|
||||
t = t.trim().toLowerCase();
|
||||
if (t.length() == 0) {
|
||||
continue tagloop;
|
||||
}
|
||||
this.tag2print.put(t, k);
|
||||
this.print2tag.put(k, t);
|
||||
}
|
||||
this.tag2print.put(k.toLowerCase(), k);
|
||||
this.print2tag.put(k, k.toLowerCase());
|
||||
}
|
||||
}
|
||||
|
||||
public Vocabulary(String name, Localization localization) {
|
||||
this(name);
|
||||
Set<String> locNames = localization.locationNames();
|
||||
for (String loc: locNames) {
|
||||
this.tag2print.put(loc.toLowerCase(), loc);
|
||||
this.print2tag.put(loc, loc.toLowerCase());
|
||||
}
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return this.navigatorName;
|
||||
}
|
||||
|
||||
public String getPrint(final String tag) {
|
||||
return this.tag2print.get(tag);
|
||||
}
|
||||
|
||||
public String getTag(final String print) {
|
||||
return this.print2tag.get(print);
|
||||
}
|
||||
|
||||
public Set<String> tags() {
|
||||
return this.tag2print.keySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.print2tag.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public class Metatag {
|
||||
private final String vocName;
|
||||
private final String print;
|
||||
public Metatag(String vocName, String print) {
|
||||
this.vocName = vocName;
|
||||
this.print = print;
|
||||
}
|
||||
public Metatag(String metatag) {
|
||||
assert metatag.charAt(0) == Autotagging.this.prefixChar;
|
||||
int p = metatag.indexOf(':');
|
||||
assert p > 0;
|
||||
this.vocName = metatag.substring(1, p);
|
||||
this.print = metatag.substring(p + 1);
|
||||
}
|
||||
public String getVocabularyName() {
|
||||
return this.vocName;
|
||||
}
|
||||
public String getPrintName() {
|
||||
return this.print;
|
||||
}
|
||||
public String getMetatag() {
|
||||
return Autotagging.this.prefixChar + this.vocName + ":" + this.print.replaceAll(" ", "_");
|
||||
}
|
||||
}
|
||||
|
||||
public Metatag metatag(String vocName, String print) {
|
||||
return new Metatag(vocName, print);
|
||||
}
|
||||
|
||||
public Metatag metatag(String metatag) {
|
||||
return new Metatag(metatag);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
|
||||
for (Map.Entry<String, Vocabulary> entry: a.vocabularies.entrySet()) {
|
||||
System.out.println(entry);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -48,12 +48,15 @@ import net.yacy.kelondro.util.FileUtils;
|
|||
public class LibraryProvider
|
||||
{
|
||||
|
||||
private static final String path_to_source_dictionaries = "source";
|
||||
private static final String path_to_did_you_mean_dictionaries = "didyoumean";
|
||||
public static final char tagPrefix = '$';
|
||||
public static final String path_to_source_dictionaries = "source";
|
||||
public static final String path_to_did_you_mean_dictionaries = "didyoumean";
|
||||
public static final String path_to_autotagging_dictionaries = "autotagging";
|
||||
|
||||
public static final String disabledExtension = ".disabled";
|
||||
|
||||
public static WordCache dymLib = new WordCache(null);
|
||||
public static Autotagging autotagging = new Autotagging(null, tagPrefix);
|
||||
public static OverarchingLocalization geoLoc = new OverarchingLocalization();
|
||||
private static File dictSource = null;
|
||||
private static File dictRoot = null;
|
||||
|
@ -91,7 +94,7 @@ public class LibraryProvider
|
|||
* initialize the LibraryProvider as static class. This assigns default paths, and initializes the
|
||||
* dictionary classes Additionally, if default dictionaries are given in the source path, they are
|
||||
* translated into the input format inside the DATA/DICTIONARIES directory
|
||||
*
|
||||
*
|
||||
* @param pathToSource
|
||||
* @param pathToDICTIONARIES
|
||||
*/
|
||||
|
@ -107,6 +110,8 @@ public class LibraryProvider
|
|||
initDidYouMean();
|
||||
integrateOpenGeoDB();
|
||||
integrateGeonames();
|
||||
initAutotagging(tagPrefix);
|
||||
autotagging.addLocalization(geoLoc);
|
||||
}
|
||||
|
||||
public static void integrateOpenGeoDB() {
|
||||
|
@ -141,6 +146,14 @@ public class LibraryProvider
|
|||
dymLib = new WordCache(dymDict);
|
||||
}
|
||||
|
||||
public static void initAutotagging(char prefix) {
|
||||
final File autotaggingPath = new File(dictRoot, path_to_autotagging_dictionaries);
|
||||
if ( !autotaggingPath.exists() ) {
|
||||
autotaggingPath.mkdirs();
|
||||
}
|
||||
autotagging = new Autotagging(autotaggingPath, prefix);
|
||||
}
|
||||
|
||||
public static void removeDeReWo() {
|
||||
final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
|
||||
final File derewoInput = LibraryProvider.Dictionary.DRW0.file();
|
||||
|
|
|
@ -165,6 +165,20 @@ public class GeonamesLocalization implements Localization
|
|||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* produce a set of location names
|
||||
* @return a set of names
|
||||
*/
|
||||
@Override
|
||||
public Set<String> locationNames() {
|
||||
Set<String> locations = new HashSet<String>();
|
||||
Set<StringBuilder> l = this.name2ids.keySet();
|
||||
for (StringBuilder s: l) {
|
||||
locations.add(s.toString());
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> recommend(final String s) {
|
||||
final Set<String> a = new HashSet<String>();
|
||||
|
|
|
@ -47,12 +47,19 @@ public interface Localization {
|
|||
*/
|
||||
public TreeSet<Location> find(String anyname, boolean locationexact);
|
||||
|
||||
/**
|
||||
* produce a set of location names
|
||||
* @return a set of names
|
||||
*/
|
||||
public Set<String> locationNames();
|
||||
|
||||
/**
|
||||
* recommend a set of names according to a given name
|
||||
* @param s a possibly partially matching name
|
||||
* @return a set of names that match with the given name using the local dictionary of names
|
||||
*/
|
||||
public Set<String> recommend(String s);
|
||||
|
||||
/**
|
||||
* recommend a set of names according to a given name
|
||||
* @param s a possibly partially matching name
|
||||
|
@ -70,6 +77,7 @@ public interface Localization {
|
|||
* hashCode that must be used to distinguish localization services in hash sets
|
||||
* @return the hash code, may be derived from the nickname
|
||||
*/
|
||||
@Override
|
||||
public int hashCode();
|
||||
|
||||
/**
|
||||
|
@ -77,5 +85,6 @@ public interface Localization {
|
|||
* @param other
|
||||
* @return true if both objects are localization services and have the same nickname
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object other);
|
||||
}
|
||||
|
|
|
@ -195,7 +195,7 @@ public class OpenGeoDBLocalization implements Localization
|
|||
/**
|
||||
* check database tables against occurrences of this entity the anyname - String may be one of: - name of
|
||||
* a town, villa, region etc - zip code - telephone prefix - kfz sign
|
||||
*
|
||||
*
|
||||
* @param anyname
|
||||
* @return
|
||||
*/
|
||||
|
@ -241,9 +241,23 @@ public class OpenGeoDBLocalization implements Localization
|
|||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* produce a set of location names
|
||||
* @return a set of names
|
||||
*/
|
||||
@Override
|
||||
public Set<String> locationNames() {
|
||||
Set<String> locations = new HashSet<String>();
|
||||
Set<StringBuilder> l = this.name2ids.keySet();
|
||||
for (StringBuilder s: l) {
|
||||
locations.add(s.toString());
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* read the dictionary and construct a set of recommendations to a given string
|
||||
*
|
||||
*
|
||||
* @param s input value that is used to match recommendations
|
||||
* @return a set that contains all words that start with the input value
|
||||
*/
|
||||
|
|
|
@ -56,6 +56,11 @@ public class OverarchingLocalization implements Localization {
|
|||
this.services.remove(nickname);
|
||||
}
|
||||
|
||||
/**
|
||||
* the number of locations that this localization stores
|
||||
* @return the number of locations
|
||||
*/
|
||||
@Override
|
||||
public int locations() {
|
||||
int locations = 0;
|
||||
for (final Localization service: this.services.values()) {
|
||||
|
@ -65,8 +70,12 @@ public class OverarchingLocalization implements Localization {
|
|||
}
|
||||
|
||||
/**
|
||||
* find (a set of) locations
|
||||
* find a location by name
|
||||
* @param anyname - a name of a location
|
||||
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
|
||||
* @return a set of locations, ordered by population (if this information is given)
|
||||
*/
|
||||
@Override
|
||||
public TreeSet<Location> find(final String anyname, final boolean locationexact) {
|
||||
final TreeSet<Location> locations = new TreeSet<Location>();
|
||||
for (final Localization service: this.services.values()) {
|
||||
|
@ -76,36 +85,80 @@ public class OverarchingLocalization implements Localization {
|
|||
}
|
||||
|
||||
/**
|
||||
* recommend location names
|
||||
* produce a set of location names
|
||||
* @return a set of names
|
||||
*/
|
||||
@Override
|
||||
public Set<String> locationNames() {
|
||||
final Set<String> locations = new HashSet<String>();
|
||||
for (final Localization service: this.services.values()) {
|
||||
locations.addAll(service.locationNames());
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* recommend a set of names according to a given name
|
||||
* @param s a possibly partially matching name
|
||||
* @return a set of names that match with the given name using the local dictionary of names
|
||||
*/
|
||||
@Override
|
||||
public Set<String> recommend(final String s) {
|
||||
final Set<String> recommendations = new HashSet<String>();
|
||||
if (s.length() == 0) return recommendations;
|
||||
if (s.length() == 0) {
|
||||
return recommendations;
|
||||
}
|
||||
for (final Localization service: this.services.values()) {
|
||||
recommendations.addAll(service.recommend(s));
|
||||
}
|
||||
return recommendations;
|
||||
}
|
||||
|
||||
/**
|
||||
* recommend a set of names according to a given name
|
||||
* @param s a possibly partially matching name
|
||||
* @return a set of names that match with the given name using the local dictionary of names
|
||||
*/
|
||||
@Override
|
||||
public Set<StringBuilder> recommend(final StringBuilder s) {
|
||||
final Set<StringBuilder> recommendations = new HashSet<StringBuilder>();
|
||||
if (s.length() == 0) return recommendations;
|
||||
if (s.length() == 0) {
|
||||
return recommendations;
|
||||
}
|
||||
for (final Localization service: this.services.values()) {
|
||||
recommendations.addAll(service.recommend(s));
|
||||
}
|
||||
return recommendations;
|
||||
}
|
||||
|
||||
/**
|
||||
* return an nickname of the localization service
|
||||
* @return the nickname
|
||||
*/
|
||||
@Override
|
||||
public String nickname() {
|
||||
return "oa";
|
||||
}
|
||||
|
||||
/**
|
||||
* hashCode that must be used to distinguish localization services in hash sets
|
||||
* @return the hash code, may be derived from the nickname
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return nickname().hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* compare localization services; to be used for hash sets with localization services
|
||||
* @param other
|
||||
* @return true if both objects are localization services and have the same nickname
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
if (!(other instanceof Localization)) return false;
|
||||
if (!(other instanceof Localization)) {
|
||||
return false;
|
||||
}
|
||||
return nickname().equals(((Localization) other).nickname());
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user