added another geolocalization data source: GeoNames

- added downloader option in DictionaryLoader
- added generalization (interfaces and overarching localization)
- more abstraction using the libraries

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6879 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2010-05-15 23:49:30 +00:00
parent c9862e0ca9
commit e43e61e502
14 changed files with 575 additions and 151 deletions

View File

@ -1,4 +1,4 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Dictionary Loader</title>
@ -18,8 +18,46 @@
<form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data">
<fieldset>
<legend>Geolocalization</legend>
The geolocalization file will enable YaCy to present locations from OpenStreetMap according to given search words.
With this file it is possible to find locations using the location (city) name, a zip code, a car sign or a telephone pre-dial number.
Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
<h4>GeoNames</h4>
<p>With this file it is possible to find cities with a population > 1000 all over the world.</p>
<dl>
<dt><label>Download from</label></dt>
<dd>#[geon0URL]#</dd>
<dt><label>Storage location</label></dt>
<dd>#[geon0Storage]#</dd>
<dt><label>Status</label></dt>
<dd>#(geon0Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::de-activated#(/geon0Status)#</dd>
<dt></dt>
<dd>#(geon0Status)#
<input type="submit" name="geon0Load" value="Load" />::
<input type="submit" name="geon0Deactivate" value="de-Activate" />
<input type="submit" name="geon0Remove" value="Remove" />::
<input type="submit" name="geon0Activate" value="Activate" />
<input type="submit" name="geon0Remove" value="Remove" />
#(/geon0Status)#</dd>
#(geon0ActionLoaded)#::
<dt></dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
#(/geon0ActionLoaded)#
#(geon0ActionRemoved)#::
<dt></dt><dd><div class="commit">de-activated and removed dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
#(/geon0ActionRemoved)#
#(geon0ActionDeactivated)#::
<dt></dt><dd><div class="commit">de-activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot de-activate dictionary file: #[error]#</div></dd>
#(/geon0ActionDeactivated)#
#(geon0ActionActivated)#::
<dt></dt><dd><div class="commit">activated dictionary file</div></dd>::
<dt></dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
#(/geon0ActionActivated)#
</dl>
<h4>OpenGeoDB</h4>
<p>With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.</p>
<dl>
<dt><label>Download from</label></dt>

View File

@ -21,7 +21,8 @@
import java.io.IOException;
import java.net.MalformedURLException;
import net.yacy.document.geolocalization.OpenGeoDB;
import net.yacy.document.geolocalization.GeonamesLocalization;
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
@ -58,15 +59,56 @@ public class DictionaryLoader_p {
if (post == null) return prop;
// GEON0
if (post.containsKey("geon0Load")) {
// load from the net
try {
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
prop.put("geon0ActionLoaded", 1);
} catch (MalformedURLException e) {
Log.logException(e);
prop.put("geon0ActionLoaded", 2);
prop.put("geon0ActionLoaded_error", e.getMessage());
} catch (IOException e) {
Log.logException(e);
prop.put("geon0ActionLoaded", 2);
prop.put("geon0ActionLoaded_error", e.getMessage());
}
}
if (post.containsKey("geon0Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionRemoved", 1);
}
if (post.containsKey("geon0Deactivate")) {
LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
prop.put("geon0ActionDeactivated", 1);
}
if (post.containsKey("geon0Activate")) {
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
prop.put("geon0ActionActivated", 1);
}
// GEO1
if (post.containsKey("geo1Load")) {
// load from the net
try {
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEO1.file());
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
prop.put("geo1Status", LibraryProvider.Dictionary.GEO1.file().exists() ? 1 : 0);
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
prop.put("geo1ActionLoaded", 1);
} catch (MalformedURLException e) {
Log.logException(e);
@ -80,25 +122,24 @@ public class DictionaryLoader_p {
}
if (post.containsKey("geo1Remove")) {
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.fileDisabled());
LibraryProvider.geoDB = new OpenGeoDB(null, true);
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file());
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionRemoved", 1);
}
if (post.containsKey("geo1Deactivate")) {
LibraryProvider.Dictionary.GEO1.file().renameTo(LibraryProvider.Dictionary.GEO1.fileDisabled());
LibraryProvider.geoDB = new OpenGeoDB(null, true);
LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled());
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
prop.put("geo1ActionDeactivated", 1);
}
if (post.containsKey("geo1Activate")) {
LibraryProvider.Dictionary.GEO1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO1.file());
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
prop.put("geo1ActionActivated", 1);
}
// check status again
for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);

View File

@ -151,7 +151,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
</div>
#{/loc}#
</p>
<p class="urlinfo" style="clear:left;">Geographic information provided by <a href="http://opengeodb.hoppe-media.com">OpenGeoDB</a>, Map provided by <a href="http://www.openstreetmap.org">OpenStreetMap</a></p>
<p class="urlinfo" style="clear:left;">Map (c) by <a href="http://www.openstreetmap.org">OpenStreetMap</a> and contributors, CC-BY-SA</p>
</div>
#(/geoinfo)#

View File

@ -560,7 +560,7 @@ public class yacysearch {
}
// find geographic info
Set<Location> coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true);
Set<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
if (coordinates == null || coordinates.isEmpty() || offset > 0) {
prop.put("geoinfo", "0");
} else {

View File

@ -69,7 +69,7 @@ public class yacysearch_location {
String subject = "";
for (String s: message.getSubject()) subject += " " + s;
words += subject;
for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false));
for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoLoc.find(word, true));
String locnames = "";
for (Location location: locations) locnames += ", " + location.getName();

View File

@ -221,7 +221,7 @@ public class DidYouMean {
public void test(final String s) throws InterruptedException {
Set<String> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoDB.recommend(s));
libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) createGen = false;
for (final String t: libr) {
guessLib.put(t);

View File

@ -1,28 +1,24 @@
// LibraryProvider.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 01.10.2009 on http://yacy.net
//
// This is a part of YaCy
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* LibraryProvider.java
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 01.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package de.anomic.data;
@ -39,7 +35,9 @@ import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import net.yacy.document.geolocalization.OpenGeoDB;
import net.yacy.document.geolocalization.GeonamesLocalization;
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
import net.yacy.document.geolocalization.OverarchingLocalization;
import net.yacy.kelondro.logging.Log;
public class LibraryProvider {
@ -50,17 +48,20 @@ public class LibraryProvider {
public static final String disabledExtension = ".disabled";
public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
public static OpenGeoDB geoDB = new OpenGeoDB(null, true);
public static OverarchingLocalization geoLoc = new OverarchingLocalization();
private static File dictSource = null;
private static File dictRoot = null;
public static enum Dictionary {
GEO0("geo0",
GEODB0("geo0",
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz",
"opengeodb-0.2.5a-UTF8-sql.gz"),
GEO1("geo1",
GEODB1("geo1",
"http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz",
"opengeodb-02621_2010-03-16.sql.gz");
"opengeodb-02621_2010-03-16.sql.gz"),
GEON0("geon0",
"http://download.geonames.org/export/dump/cities1000.zip",
"cities1000.zip");
public String nickname, url, filename;
private Dictionary(String nickname, String url, String filename) {
@ -95,18 +96,27 @@ public class LibraryProvider {
integrateDeReWo();
initDidYouMean();
integrateOpenGeoDB();
integrateGeonames();
}
public static void integrateOpenGeoDB() {
File geo1 = Dictionary.GEO1.file();
File geo0 = Dictionary.GEO0.file();
File geo1 = Dictionary.GEODB1.file();
File geo0 = Dictionary.GEODB0.file();
if (geo1.exists()) {
if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled());
geoDB = new OpenGeoDB(geo1, false);
if (geo0.exists()) geo0.renameTo(Dictionary.GEODB0.fileDisabled());
geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(geo1, false));
return;
}
if (geo0.exists()) {
geoDB = new OpenGeoDB(geo0, true);
geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocalization(geo0, false));
return;
}
}
public static void integrateGeonames() {
File geon = Dictionary.GEON0.file();
if (geon.exists()) {
geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocalization(geon));
return;
}
}

View File

@ -53,7 +53,6 @@ import java.util.Iterator;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.table.Table;

View File

@ -1,28 +1,24 @@
// Coordinates.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.10.2009 on http://yacy.net
//
// This is a part of YaCy
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* Coordinates.java
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 04.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;

View File

@ -0,0 +1,169 @@
/**
* GeonamesLocalization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.Collator;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import net.yacy.kelondro.logging.Log;
public class GeonamesLocalization implements Localization {
/*
The main 'geoname' table has the following fields :
---------------------------------------------------
geonameid : integer id of record in geonames database
name : name of geographical point (utf8) varchar(200)
asciiname : name of geographical point in plain ascii characters, varchar(200)
alternatenames : alternatenames, comma separated varchar(5000)
latitude : latitude in decimal degrees (wgs84)
longitude : longitude in decimal degrees (wgs84)
feature class : see http://www.geonames.org/export/codes.html, char(1)
feature code : see http://www.geonames.org/export/codes.html, varchar(10)
country code : ISO-3166 2-letter country code, 2 characters
cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
admin3 code : code for third level administrative division, varchar(20)
admin4 code : code for fourth level administrative division, varchar(20)
population : bigint (8 byte int)
elevation : in meters, integer
gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
timezone : the timezone id (see file timeZone.txt)
modification date : date of last modification in yyyy-MM-dd format
*/
// use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
private final HashMap<Integer, Location> id2loc;
private final TreeMap<String, List<Integer>> name2ids;
private final File file;
public GeonamesLocalization(final File file) {
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
this.file = file;
this.id2loc = new HashMap<Integer, Location>();
this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
if (file == null || !file.exists()) return;
BufferedReader reader;
try {
ZipFile zf = new ZipFile(file);
ZipEntry ze = zf.getEntry("cities1000.txt");
InputStream is = zf.getInputStream(ze);
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
} catch (IOException e) {
Log.logException(e);
return;
}
// when an error occurs after this line, just accept it and work on
try {
String line;
String[] fields;
Set<String> locnames;
while ((line = reader.readLine()) != null) {
if (line.length() == 0) continue;
fields = line.split("\t");
int id = Integer.parseInt(fields[0]);
locnames = new HashSet<String>();
locnames.add(fields[1]);
locnames.add(fields[2]);
for (String s: fields[3].split(",")) locnames.add(s);
Location c = new Location(Double.parseDouble(fields[5]), Double.parseDouble(fields[4]), fields[1]);
this.id2loc.put(id, c);
for (String name: locnames) {
List<Integer> locs = this.name2ids.get(name);
if (locs == null) locs = new ArrayList<Integer>(1);
locs.add(id);
this.name2ids.put(name, locs);
}
}
} catch (IOException e) {
Log.logException(e);
}
}
public Set<Location> find(String anyname, boolean locationexact) {
HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c;
if (locationexact) {
c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
} else {
SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
}
}
HashSet<Location> a = new HashSet<Location>();
for (Integer e: r) {
Location w = this.id2loc.get(e);
if (w != null) a.add(w);
}
return a;
}
public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase();
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
if (r.startsWith(s)) a.add(r); else break;
}
return a;
}
public String nickname() {
return this.file.getName();
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
}

View File

@ -0,0 +1,68 @@
/**
* Localization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.util.Set;
/**
* localization interface
* @author Michael Peter Christen
*
*/
public interface Localization {
/**
* find a location by name
* @param anyname - a name of a location
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
* @return a set of locations
*/
public Set<Location> find(String anyname, boolean locationexact);
/**
* recommend a set of names according to a given name
* @param s a possibly partially matching name
* @return a set of names that match with the given name using the local dictionary of names
*/
public Set<String> recommend(String s);
/**
* return an nickname of the localization service
* @return the nickname
*/
public String nickname();
/**
* hashCode that must be used to distinuguish localization services in hash sets
* @return the hash code, may be derived from the nickname
*/
public int hashCode();
/**
* compare localization services; to be used for hash sets with localization services
* @param other
* @return true if both objects are localization services and have the same nickname
*/
public boolean equals(Object other);
}

View File

@ -1,28 +1,24 @@
// Coordinates.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 08.10.2009 on http://yacy.net
//
// This is a part of YaCy
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* Location.java
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 08.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
@ -49,4 +45,9 @@ public class Location extends Coordinates {
return this.name;
}
public boolean equals(Object loc) {
if (!(loc instanceof Location)) return false;
return super.equals(loc) && this.name.equals((Location) loc);
}
}

View File

@ -1,28 +1,24 @@
// OpenGeoDB.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.10.2009 on http://yacy.net
//
// This is a part of YaCy
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* OpenGeoDBLocalization
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 04.10.2009 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
@ -59,7 +55,7 @@ import net.yacy.kelondro.logging.Log;
* This class will provide a super-fast access to the OpenGeoDB,
* since all request are evaluated using data in the RAM.
*/
public class OpenGeoDB {
public class OpenGeoDBLocalization implements Localization {
// use a collator to relax when distinguishing between lowercase und uppercase letters
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
@ -71,17 +67,19 @@ public class OpenGeoDB {
private final HashMap<Integer, String> locTypeHash2locType;
private final HashMap<Integer, Location> id2loc;
private final HashMap<Integer, Integer> id2locTypeHash;
private final TreeMap<String, List<Integer>> locationName2ids;
private final TreeMap<String, List<Integer>> name2ids;
private final TreeMap<String, List<Integer>> kfz2ids;
private final HashMap<String, List<Integer>> predial2ids;
private final HashMap<String, Integer> zip2id;
private final File file;
public OpenGeoDB(final File file, boolean lonlat) {
public OpenGeoDBLocalization(final File file, boolean lonlat) {
this.file = file;
this.locTypeHash2locType = new HashMap<Integer, String>();
this.id2loc = new HashMap<Integer, Location>();
this.id2locTypeHash = new HashMap<Integer, Integer>();
this.locationName2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
this.kfz2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
this.predial2ids = new HashMap<String, List<Integer>>();
this.zip2id = new HashMap<String, Integer>();
@ -123,10 +121,10 @@ public class OpenGeoDB {
if (v[1].equals("500100000")) { // Ortsname
id = Integer.parseInt(v[0]);
h = removeQuotes(v[2]);
List<Integer> l = this.locationName2ids.get(h);
List<Integer> l = this.name2ids.get(h);
if (l == null) l = new ArrayList<Integer>(1);
l.add(id);
this.locationName2ids.put(h, l);
this.name2ids.put(h, l);
Location loc = this.id2loc.get(id);
if (loc != null) loc.setName(h);
} else if (v[1].equals("500400000")) { // Vorwahl
@ -181,22 +179,20 @@ public class OpenGeoDB {
* @param anyname
* @return
*/
public HashSet<Location> find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) {
public HashSet<Location> find(String anyname, boolean locationexact) {
HashSet<Integer> r = new HashSet<Integer>();
List<Integer> c;
if (location) {
if (locationexact) {
c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c);
c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
} else {
SortedMap<String, List<Integer>> cities = this.locationName2ids.tailMap(anyname);
SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
}
c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);
c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);
Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);
}
}
if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);}
if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);}
if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);}
HashSet<Location> a = new HashSet<Location>();
for (Integer e: r) {
Location w = this.id2loc.get(e);
@ -213,10 +209,23 @@ public class OpenGeoDB {
public Set<String> recommend(String s) {
Set<String> a = new HashSet<String>();
s = s.trim().toLowerCase();
SortedMap<String, List<Integer>> t = this.locationName2ids.tailMap(s);
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
for (String r: t.keySet()) {
if (r.startsWith(s)) a.add(r); else break;
}
return a;
}
public String nickname() {
return this.file.getName();
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
}

View File

@ -0,0 +1,93 @@
/**
* OverarchingLocalization.java
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* first published 16.05.2010 on http://yacy.net
*
* This file is part of YaCy Content Integration
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file COPYING.LESSER.
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.document.geolocalization;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class OverarchingLocalization implements Localization {
private Map<String, Localization> services;
/**
* create a new overarching localization object
*/
public OverarchingLocalization() {
this.services = new HashMap<String, Localization>();
}
/**
* add a localization service
* @param nickname the nickname of the service
* @param service the service
*/
public void addLocalization(String nickname, Localization service) {
this.services.put(nickname, service);
}
/**
* remove a localization service
* @param nickname
*/
public void removeLocalization(String nickname) {
this.services.remove(nickname);
}
/**
* find (a set of) locations
*/
public Set<Location> find(String anyname, boolean locationexact) {
Set<Location> locations = new HashSet<Location>();
for (Localization service: this.services.values()) {
locations.addAll(service.find(anyname, locationexact));
}
return locations;
}
/**
* recommend location names
*/
public Set<String> recommend(String s) {
Set<String> recommendations = new HashSet<String>();
for (Localization service: this.services.values()) {
recommendations.addAll(service.recommend(s));
}
return recommendations;
}
public String nickname() {
return "oa";
}
public int hashCode() {
return this.nickname().hashCode();
}
public boolean equals(Object other) {
if (!(other instanceof Localization)) return false;
return this.nickname().equals(((Localization) other).nickname());
}
}