mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added another geolocalization data source: GeoNames
- added downloader option in DictionaryLoader - added generalization (interfaces and overarching localization) - more abstraction using the libraries git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6879 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
c9862e0ca9
commit
e43e61e502
|
@ -1,4 +1,4 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Dictionary Loader</title>
|
||||
|
@ -18,8 +18,46 @@
|
|||
<form action="DictionaryLoader_p.html" method="post" enctype="multipart/form-data">
|
||||
<fieldset>
|
||||
<legend>Geolocalization</legend>
|
||||
The geolocalization file will enable YaCy to present locations from OpenStreetMap according to given search words.
|
||||
With this file it is possible to find locations using the location (city) name, a zip code, a car sign or a telephone pre-dial number.
|
||||
Geolocalization will enable YaCy to present locations from OpenStreetMap according to given search words.
|
||||
|
||||
<h4>GeoNames</h4>
|
||||
<p>With this file it is possible to find cities with a population > 1000 all over the world.</p>
|
||||
|
||||
<dl>
|
||||
<dt><label>Download from</label></dt>
|
||||
<dd>#[geon0URL]#</dd>
|
||||
<dt><label>Storage location</label></dt>
|
||||
<dd>#[geon0Storage]#</dd>
|
||||
<dt><label>Status</label></dt>
|
||||
<dd>#(geon0Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::de-activated#(/geon0Status)#</dd>
|
||||
<dt></dt>
|
||||
<dd>#(geon0Status)#
|
||||
<input type="submit" name="geon0Load" value="Load" />::
|
||||
<input type="submit" name="geon0Deactivate" value="de-Activate" />
|
||||
<input type="submit" name="geon0Remove" value="Remove" />::
|
||||
<input type="submit" name="geon0Activate" value="Activate" />
|
||||
<input type="submit" name="geon0Remove" value="Remove" />
|
||||
#(/geon0Status)#</dd>
|
||||
#(geon0ActionLoaded)#::
|
||||
<dt></dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
|
||||
<dt></dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
|
||||
#(/geon0ActionLoaded)#
|
||||
#(geon0ActionRemoved)#::
|
||||
<dt></dt><dd><div class="commit">de-activated and removed dictionary file</div></dd>::
|
||||
<dt></dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
|
||||
#(/geon0ActionRemoved)#
|
||||
#(geon0ActionDeactivated)#::
|
||||
<dt></dt><dd><div class="commit">de-activated dictionary file</div></dd>::
|
||||
<dt></dt><dd><div class="error">cannot de-activate dictionary file: #[error]#</div></dd>
|
||||
#(/geon0ActionDeactivated)#
|
||||
#(geon0ActionActivated)#::
|
||||
<dt></dt><dd><div class="commit">activated dictionary file</div></dd>::
|
||||
<dt></dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
|
||||
#(/geon0ActionActivated)#
|
||||
</dl>
|
||||
|
||||
<h4>OpenGeoDB</h4>
|
||||
<p>With this file it is possible to find locations in Germany using the location (city) name, a zip code, a car sign or a telephone pre-dial number.</p>
|
||||
|
||||
<dl>
|
||||
<dt><label>Download from</label></dt>
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import net.yacy.document.geolocalization.OpenGeoDB;
|
||||
import net.yacy.document.geolocalization.GeonamesLocalization;
|
||||
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
|
@ -58,15 +59,56 @@ public class DictionaryLoader_p {
|
|||
|
||||
if (post == null) return prop;
|
||||
|
||||
// GEON0
|
||||
if (post.containsKey("geon0Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
|
||||
byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
|
||||
prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0);
|
||||
prop.put("geon0ActionLoaded", 1);
|
||||
} catch (MalformedURLException e) {
|
||||
Log.logException(e);
|
||||
prop.put("geon0ActionLoaded", 2);
|
||||
prop.put("geon0ActionLoaded_error", e.getMessage());
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
prop.put("geon0ActionLoaded", 2);
|
||||
prop.put("geon0ActionLoaded_error", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
if (post.containsKey("geon0Remove")) {
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file());
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
|
||||
prop.put("geon0ActionRemoved", 1);
|
||||
}
|
||||
|
||||
if (post.containsKey("geon0Deactivate")) {
|
||||
LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname);
|
||||
prop.put("geon0ActionDeactivated", 1);
|
||||
}
|
||||
|
||||
if (post.containsKey("geon0Activate")) {
|
||||
LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
|
||||
prop.put("geon0ActionActivated", 1);
|
||||
}
|
||||
|
||||
// GEO1
|
||||
if (post.containsKey("geo1Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
|
||||
Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
|
||||
byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEO1.file());
|
||||
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
|
||||
prop.put("geo1Status", LibraryProvider.Dictionary.GEO1.file().exists() ? 1 : 0);
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
|
||||
prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0);
|
||||
prop.put("geo1ActionLoaded", 1);
|
||||
} catch (MalformedURLException e) {
|
||||
Log.logException(e);
|
||||
|
@ -80,25 +122,24 @@ public class DictionaryLoader_p {
|
|||
}
|
||||
|
||||
if (post.containsKey("geo1Remove")) {
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.file());
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.fileDisabled());
|
||||
LibraryProvider.geoDB = new OpenGeoDB(null, true);
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file());
|
||||
FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
|
||||
prop.put("geo1ActionRemoved", 1);
|
||||
}
|
||||
|
||||
if (post.containsKey("geo1Deactivate")) {
|
||||
LibraryProvider.Dictionary.GEO1.file().renameTo(LibraryProvider.Dictionary.GEO1.fileDisabled());
|
||||
LibraryProvider.geoDB = new OpenGeoDB(null, true);
|
||||
LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled());
|
||||
LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
|
||||
prop.put("geo1ActionDeactivated", 1);
|
||||
}
|
||||
|
||||
if (post.containsKey("geo1Activate")) {
|
||||
LibraryProvider.Dictionary.GEO1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO1.file());
|
||||
LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
|
||||
LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file());
|
||||
LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(LibraryProvider.Dictionary.GEODB1.file(), false));
|
||||
prop.put("geo1ActionActivated", 1);
|
||||
}
|
||||
|
||||
|
||||
// check status again
|
||||
for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
|
||||
prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);
|
||||
|
|
|
@ -151,7 +151,7 @@ var progressbar = new Progressbar(#[results]#, document.getElementById("results"
|
|||
</div>
|
||||
#{/loc}#
|
||||
</p>
|
||||
<p class="urlinfo" style="clear:left;">Geographic information provided by <a href="http://opengeodb.hoppe-media.com">OpenGeoDB</a>, Map provided by <a href="http://www.openstreetmap.org">OpenStreetMap</a></p>
|
||||
<p class="urlinfo" style="clear:left;">Map (c) by <a href="http://www.openstreetmap.org">OpenStreetMap</a> and contributors, CC-BY-SA</p>
|
||||
</div>
|
||||
#(/geoinfo)#
|
||||
|
||||
|
|
|
@ -560,7 +560,7 @@ public class yacysearch {
|
|||
}
|
||||
|
||||
// find geographic info
|
||||
Set<Location> coordinates = LibraryProvider.geoDB.find(originalquerystring, true, false, true, true, true);
|
||||
Set<Location> coordinates = LibraryProvider.geoLoc.find(originalquerystring, false);
|
||||
if (coordinates == null || coordinates.isEmpty() || offset > 0) {
|
||||
prop.put("geoinfo", "0");
|
||||
} else {
|
||||
|
|
|
@ -69,7 +69,7 @@ public class yacysearch_location {
|
|||
String subject = "";
|
||||
for (String s: message.getSubject()) subject += " " + s;
|
||||
words += subject;
|
||||
for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoDB.find(word, true, true, false, false, false));
|
||||
for (String word: words.split(" ")) if (word.length() >= 3) locations.addAll(LibraryProvider.geoLoc.find(word, true));
|
||||
|
||||
String locnames = "";
|
||||
for (Location location: locations) locnames += ", " + location.getName();
|
||||
|
|
|
@ -221,7 +221,7 @@ public class DidYouMean {
|
|||
|
||||
public void test(final String s) throws InterruptedException {
|
||||
Set<String> libr = LibraryProvider.dymLib.recommend(s);
|
||||
libr.addAll(LibraryProvider.geoDB.recommend(s));
|
||||
libr.addAll(LibraryProvider.geoLoc.recommend(s));
|
||||
if (!libr.isEmpty()) createGen = false;
|
||||
for (final String t: libr) {
|
||||
guessLib.put(t);
|
||||
|
|
|
@ -1,28 +1,24 @@
|
|||
// LibraryProvider.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 01.10.2009 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
/**
|
||||
* LibraryProvider.java
|
||||
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 01.10.2009 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package de.anomic.data;
|
||||
|
||||
|
@ -39,7 +35,9 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import net.yacy.document.geolocalization.OpenGeoDB;
|
||||
import net.yacy.document.geolocalization.GeonamesLocalization;
|
||||
import net.yacy.document.geolocalization.OpenGeoDBLocalization;
|
||||
import net.yacy.document.geolocalization.OverarchingLocalization;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
public class LibraryProvider {
|
||||
|
@ -50,17 +48,20 @@ public class LibraryProvider {
|
|||
public static final String disabledExtension = ".disabled";
|
||||
|
||||
public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
|
||||
public static OpenGeoDB geoDB = new OpenGeoDB(null, true);
|
||||
public static OverarchingLocalization geoLoc = new OverarchingLocalization();
|
||||
private static File dictSource = null;
|
||||
private static File dictRoot = null;
|
||||
|
||||
public static enum Dictionary {
|
||||
GEO0("geo0",
|
||||
GEODB0("geo0",
|
||||
"http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz",
|
||||
"opengeodb-0.2.5a-UTF8-sql.gz"),
|
||||
GEO1("geo1",
|
||||
GEODB1("geo1",
|
||||
"http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz",
|
||||
"opengeodb-02621_2010-03-16.sql.gz");
|
||||
"opengeodb-02621_2010-03-16.sql.gz"),
|
||||
GEON0("geon0",
|
||||
"http://download.geonames.org/export/dump/cities1000.zip",
|
||||
"cities1000.zip");
|
||||
|
||||
public String nickname, url, filename;
|
||||
private Dictionary(String nickname, String url, String filename) {
|
||||
|
@ -95,18 +96,27 @@ public class LibraryProvider {
|
|||
integrateDeReWo();
|
||||
initDidYouMean();
|
||||
integrateOpenGeoDB();
|
||||
integrateGeonames();
|
||||
}
|
||||
|
||||
public static void integrateOpenGeoDB() {
|
||||
File geo1 = Dictionary.GEO1.file();
|
||||
File geo0 = Dictionary.GEO0.file();
|
||||
File geo1 = Dictionary.GEODB1.file();
|
||||
File geo0 = Dictionary.GEODB0.file();
|
||||
if (geo1.exists()) {
|
||||
if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled());
|
||||
geoDB = new OpenGeoDB(geo1, false);
|
||||
if (geo0.exists()) geo0.renameTo(Dictionary.GEODB0.fileDisabled());
|
||||
geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocalization(geo1, false));
|
||||
return;
|
||||
}
|
||||
if (geo0.exists()) {
|
||||
geoDB = new OpenGeoDB(geo0, true);
|
||||
geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocalization(geo0, false));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public static void integrateGeonames() {
|
||||
File geon = Dictionary.GEON0.file();
|
||||
if (geon.exists()) {
|
||||
geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocalization(geon));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,7 +53,6 @@ import java.util.Iterator;
|
|||
import net.yacy.kelondro.index.Column;
|
||||
import net.yacy.kelondro.index.Row;
|
||||
import net.yacy.kelondro.index.RowSpaceExceededException;
|
||||
import net.yacy.kelondro.index.Row.Entry;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.order.NaturalOrder;
|
||||
import net.yacy.kelondro.table.Table;
|
||||
|
|
|
@ -1,28 +1,24 @@
|
|||
// Coordinates.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 04.10.2009 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
/**
|
||||
* Coordinates.java
|
||||
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 04.10.2009 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
|
|
|
@ -0,0 +1,169 @@
|
|||
/**
|
||||
* GeonamesLocalization.java
|
||||
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 16.05.2010 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
public class GeonamesLocalization implements Localization {
|
||||
|
||||
/*
|
||||
The main 'geoname' table has the following fields :
|
||||
---------------------------------------------------
|
||||
geonameid : integer id of record in geonames database
|
||||
name : name of geographical point (utf8) varchar(200)
|
||||
asciiname : name of geographical point in plain ascii characters, varchar(200)
|
||||
alternatenames : alternatenames, comma separated varchar(5000)
|
||||
latitude : latitude in decimal degrees (wgs84)
|
||||
longitude : longitude in decimal degrees (wgs84)
|
||||
feature class : see http://www.geonames.org/export/codes.html, char(1)
|
||||
feature code : see http://www.geonames.org/export/codes.html, varchar(10)
|
||||
country code : ISO-3166 2-letter country code, 2 characters
|
||||
cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
|
||||
admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
|
||||
admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
|
||||
admin3 code : code for third level administrative division, varchar(20)
|
||||
admin4 code : code for fourth level administrative division, varchar(20)
|
||||
population : bigint (8 byte int)
|
||||
elevation : in meters, integer
|
||||
gtopo30 : average elevation of 30'x30' (ca 900mx900m) area in meters, integer
|
||||
timezone : the timezone id (see file timeZone.txt)
|
||||
modification date : date of last modification in yyyy-MM-dd format
|
||||
*/
|
||||
|
||||
// use a collator to relax when distinguishing between lowercase und uppercase letters
|
||||
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
|
||||
static {
|
||||
insensitiveCollator.setStrength(Collator.SECONDARY);
|
||||
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
}
|
||||
|
||||
private final HashMap<Integer, Location> id2loc;
|
||||
private final TreeMap<String, List<Integer>> name2ids;
|
||||
private final File file;
|
||||
|
||||
public GeonamesLocalization(final File file) {
|
||||
// this is a processing of the cities1000.zip file from http://download.geonames.org/export/dump/
|
||||
|
||||
this.file = file;
|
||||
this.id2loc = new HashMap<Integer, Location>();
|
||||
this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
|
||||
|
||||
if (file == null || !file.exists()) return;
|
||||
BufferedReader reader;
|
||||
try {
|
||||
ZipFile zf = new ZipFile(file);
|
||||
ZipEntry ze = zf.getEntry("cities1000.txt");
|
||||
InputStream is = zf.getInputStream(ze);
|
||||
reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
return;
|
||||
}
|
||||
|
||||
// when an error occurs after this line, just accept it and work on
|
||||
try {
|
||||
String line;
|
||||
String[] fields;
|
||||
Set<String> locnames;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (line.length() == 0) continue;
|
||||
fields = line.split("\t");
|
||||
int id = Integer.parseInt(fields[0]);
|
||||
locnames = new HashSet<String>();
|
||||
locnames.add(fields[1]);
|
||||
locnames.add(fields[2]);
|
||||
for (String s: fields[3].split(",")) locnames.add(s);
|
||||
Location c = new Location(Double.parseDouble(fields[5]), Double.parseDouble(fields[4]), fields[1]);
|
||||
this.id2loc.put(id, c);
|
||||
for (String name: locnames) {
|
||||
List<Integer> locs = this.name2ids.get(name);
|
||||
if (locs == null) locs = new ArrayList<Integer>(1);
|
||||
locs.add(id);
|
||||
this.name2ids.put(name, locs);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public Set<Location> find(String anyname, boolean locationexact) {
|
||||
HashSet<Integer> r = new HashSet<Integer>();
|
||||
List<Integer> c;
|
||||
if (locationexact) {
|
||||
c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
|
||||
} else {
|
||||
SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
|
||||
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
|
||||
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
|
||||
}
|
||||
}
|
||||
HashSet<Location> a = new HashSet<Location>();
|
||||
for (Integer e: r) {
|
||||
Location w = this.id2loc.get(e);
|
||||
if (w != null) a.add(w);
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
public Set<String> recommend(String s) {
|
||||
Set<String> a = new HashSet<String>();
|
||||
s = s.trim().toLowerCase();
|
||||
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
|
||||
for (String r: t.keySet()) {
|
||||
if (r.startsWith(s)) a.add(r); else break;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
public String nickname() {
|
||||
return this.file.getName();
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return this.nickname().hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof Localization)) return false;
|
||||
return this.nickname().equals(((Localization) other).nickname());
|
||||
}
|
||||
}
|
68
source/net/yacy/document/geolocalization/Localization.java
Normal file
68
source/net/yacy/document/geolocalization/Localization.java
Normal file
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Localization.java
|
||||
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 16.05.2010 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* localization interface
|
||||
* @author Michael Peter Christen
|
||||
*
|
||||
*/
|
||||
public interface Localization {
|
||||
|
||||
/**
|
||||
* find a location by name
|
||||
* @param anyname - a name of a location
|
||||
* @param locationexact - if true, then only exact matched with the location are returned. if false also partially matching names
|
||||
* @return a set of locations
|
||||
*/
|
||||
public Set<Location> find(String anyname, boolean locationexact);
|
||||
|
||||
/**
|
||||
* recommend a set of names according to a given name
|
||||
* @param s a possibly partially matching name
|
||||
* @return a set of names that match with the given name using the local dictionary of names
|
||||
*/
|
||||
public Set<String> recommend(String s);
|
||||
|
||||
/**
|
||||
* return an nickname of the localization service
|
||||
* @return the nickname
|
||||
*/
|
||||
public String nickname();
|
||||
|
||||
/**
|
||||
* hashCode that must be used to distinuguish localization services in hash sets
|
||||
* @return the hash code, may be derived from the nickname
|
||||
*/
|
||||
public int hashCode();
|
||||
|
||||
/**
|
||||
* compare localization services; to be used for hash sets with localization services
|
||||
* @param other
|
||||
* @return true if both objects are localization services and have the same nickname
|
||||
*/
|
||||
public boolean equals(Object other);
|
||||
}
|
|
@ -1,28 +1,24 @@
|
|||
// Coordinates.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 08.10.2009 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
/**
|
||||
* Location.java
|
||||
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 08.10.2009 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
|
@ -49,4 +45,9 @@ public class Location extends Coordinates {
|
|||
return this.name;
|
||||
}
|
||||
|
||||
public boolean equals(Object loc) {
|
||||
if (!(loc instanceof Location)) return false;
|
||||
return super.equals(loc) && this.name.equals((Location) loc);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,28 +1,24 @@
|
|||
// OpenGeoDB.java
|
||||
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 04.10.2009 on http://yacy.net
|
||||
//
|
||||
// This is a part of YaCy
|
||||
//
|
||||
// $LastChangedDate$
|
||||
// $LastChangedRevision$
|
||||
// $LastChangedBy$
|
||||
//
|
||||
// LICENSE
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation; either version 2 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, write to the Free Software
|
||||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
/**
|
||||
* OpenGeoDBLocalization
|
||||
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 04.10.2009 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
|
@ -59,9 +55,9 @@ import net.yacy.kelondro.logging.Log;
|
|||
* This class will provide a super-fast access to the OpenGeoDB,
|
||||
* since all request are evaluated using data in the RAM.
|
||||
*/
|
||||
public class OpenGeoDB {
|
||||
public class OpenGeoDBLocalization implements Localization {
|
||||
|
||||
// use a collator to relax when distinguishing between lowercase und uppercase letters
|
||||
// use a collator to relax when distinguishing between lowercase und uppercase letters
|
||||
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
|
||||
static {
|
||||
insensitiveCollator.setStrength(Collator.SECONDARY);
|
||||
|
@ -71,17 +67,19 @@ public class OpenGeoDB {
|
|||
private final HashMap<Integer, String> locTypeHash2locType;
|
||||
private final HashMap<Integer, Location> id2loc;
|
||||
private final HashMap<Integer, Integer> id2locTypeHash;
|
||||
private final TreeMap<String, List<Integer>> locationName2ids;
|
||||
private final TreeMap<String, List<Integer>> name2ids;
|
||||
private final TreeMap<String, List<Integer>> kfz2ids;
|
||||
private final HashMap<String, List<Integer>> predial2ids;
|
||||
private final HashMap<String, Integer> zip2id;
|
||||
private final File file;
|
||||
|
||||
public OpenGeoDB(final File file, boolean lonlat) {
|
||||
public OpenGeoDBLocalization(final File file, boolean lonlat) {
|
||||
|
||||
this.file = file;
|
||||
this.locTypeHash2locType = new HashMap<Integer, String>();
|
||||
this.id2loc = new HashMap<Integer, Location>();
|
||||
this.id2locTypeHash = new HashMap<Integer, Integer>();
|
||||
this.locationName2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
|
||||
this.name2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
|
||||
this.kfz2ids = new TreeMap<String, List<Integer>>(insensitiveCollator);
|
||||
this.predial2ids = new HashMap<String, List<Integer>>();
|
||||
this.zip2id = new HashMap<String, Integer>();
|
||||
|
@ -123,10 +121,10 @@ public class OpenGeoDB {
|
|||
if (v[1].equals("500100000")) { // Ortsname
|
||||
id = Integer.parseInt(v[0]);
|
||||
h = removeQuotes(v[2]);
|
||||
List<Integer> l = this.locationName2ids.get(h);
|
||||
List<Integer> l = this.name2ids.get(h);
|
||||
if (l == null) l = new ArrayList<Integer>(1);
|
||||
l.add(id);
|
||||
this.locationName2ids.put(h, l);
|
||||
this.name2ids.put(h, l);
|
||||
Location loc = this.id2loc.get(id);
|
||||
if (loc != null) loc.setName(h);
|
||||
} else if (v[1].equals("500400000")) { // Vorwahl
|
||||
|
@ -181,22 +179,20 @@ public class OpenGeoDB {
|
|||
* @param anyname
|
||||
* @return
|
||||
*/
|
||||
public HashSet<Location> find(String anyname, boolean location, boolean locationexact, boolean kfz, boolean predial, boolean zip) {
|
||||
public HashSet<Location> find(String anyname, boolean locationexact) {
|
||||
HashSet<Integer> r = new HashSet<Integer>();
|
||||
List<Integer> c;
|
||||
if (location) {
|
||||
if (locationexact) {
|
||||
c = this.locationName2ids.get(anyname); if (c != null) r.addAll(c);
|
||||
} else {
|
||||
SortedMap<String, List<Integer>> cities = this.locationName2ids.tailMap(anyname);
|
||||
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
|
||||
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
|
||||
}
|
||||
if (locationexact) {
|
||||
c = this.name2ids.get(anyname); if (c != null) r.addAll(c);
|
||||
} else {
|
||||
SortedMap<String, List<Integer>> cities = this.name2ids.tailMap(anyname);
|
||||
for (Map.Entry<String, List<Integer>> e: cities.entrySet()) {
|
||||
if (e.getKey().toLowerCase().startsWith(anyname.toLowerCase())) r.addAll(e.getValue()); else break;
|
||||
}
|
||||
c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);
|
||||
c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);
|
||||
Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);
|
||||
}
|
||||
if (kfz) {c = this.kfz2ids.get(anyname); if (c != null) r.addAll(c);}
|
||||
if (predial) {c = this.predial2ids.get(anyname); if (c != null) r.addAll(c);}
|
||||
if (zip) {Integer i = this.zip2id.get(anyname); if (i != null) r.add(i);}
|
||||
HashSet<Location> a = new HashSet<Location>();
|
||||
for (Integer e: r) {
|
||||
Location w = this.id2loc.get(e);
|
||||
|
@ -213,10 +209,23 @@ public class OpenGeoDB {
|
|||
public Set<String> recommend(String s) {
|
||||
Set<String> a = new HashSet<String>();
|
||||
s = s.trim().toLowerCase();
|
||||
SortedMap<String, List<Integer>> t = this.locationName2ids.tailMap(s);
|
||||
SortedMap<String, List<Integer>> t = this.name2ids.tailMap(s);
|
||||
for (String r: t.keySet()) {
|
||||
if (r.startsWith(s)) a.add(r); else break;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
public String nickname() {
|
||||
return this.file.getName();
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return this.nickname().hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof Localization)) return false;
|
||||
return this.nickname().equals(((Localization) other).nickname());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/**
|
||||
* OverarchingLocalization.java
|
||||
* Copyright 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
* first published 16.05.2010 on http://yacy.net
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file COPYING.LESSER.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.document.geolocalization;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class OverarchingLocalization implements Localization {
|
||||
|
||||
private Map<String, Localization> services;
|
||||
|
||||
/**
|
||||
* create a new overarching localization object
|
||||
*/
|
||||
public OverarchingLocalization() {
|
||||
this.services = new HashMap<String, Localization>();
|
||||
}
|
||||
|
||||
/**
|
||||
* add a localization service
|
||||
* @param nickname the nickname of the service
|
||||
* @param service the service
|
||||
*/
|
||||
public void addLocalization(String nickname, Localization service) {
|
||||
this.services.put(nickname, service);
|
||||
}
|
||||
|
||||
/**
|
||||
* remove a localization service
|
||||
* @param nickname
|
||||
*/
|
||||
public void removeLocalization(String nickname) {
|
||||
this.services.remove(nickname);
|
||||
}
|
||||
|
||||
/**
|
||||
* find (a set of) locations
|
||||
*/
|
||||
public Set<Location> find(String anyname, boolean locationexact) {
|
||||
Set<Location> locations = new HashSet<Location>();
|
||||
for (Localization service: this.services.values()) {
|
||||
locations.addAll(service.find(anyname, locationexact));
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* recommend location names
|
||||
*/
|
||||
public Set<String> recommend(String s) {
|
||||
Set<String> recommendations = new HashSet<String>();
|
||||
for (Localization service: this.services.values()) {
|
||||
recommendations.addAll(service.recommend(s));
|
||||
}
|
||||
return recommendations;
|
||||
}
|
||||
|
||||
public String nickname() {
|
||||
return "oa";
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return this.nickname().hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (!(other instanceof Localization)) return false;
|
||||
return this.nickname().equals(((Localization) other).nickname());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user