From acd93b1b314d469c0a91e7215d21619898438378 Mon Sep 17 00:00:00 2001 From: f1ori Date: Tue, 2 Nov 2010 17:57:48 +0000 Subject: [PATCH] * add failsafe mechanisme to domainlist retrieval domainlist is saved locally, if none of the given urls in network.unit.domainlist could be retrieved, the file from the last boot is used instead git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7289 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/search/Switchboard.java | 35 ++++++++++++------- source/de/anomic/server/serverSwitch.java | 19 +++++++--- .../yacy/cora/protocol/http/HTTPClient.java | 2 +- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 2044f1fba..19b802708 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -345,6 +345,20 @@ public final class Switchboard extends serverSwitch { // set the default segment names setDefaultSegments(); + // load domainList + try { + this.domainList = null; + if(!getConfig("network.unit.domainlist", "").equals("")) { + Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt")); + this.domainList = new FilterEngine(); + this.domainList.loadList(new BufferedReader(r), null); + } + } catch (FileNotFoundException e) { + log.logSevere("CONFIG: domainlist not found: " + e.getMessage()); + } catch (IOException e) { + log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage()); + } + // create a crawler crawler = new CrawlSwitchboard( networkName, @@ -824,15 +838,7 @@ public final class Switchboard extends serverSwitch { } */ MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global")); - - try { - this.domainList = null; - Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath()); - this.domainList = new FilterEngine(); - this.domainList.loadList(new BufferedReader(r), null); - } catch (FileNotFoundException e) { - } catch (IOException e) { - } + } public void switchNetwork(final String networkDefinition) { @@ -917,13 +923,18 @@ public final class Switchboard extends serverSwitch { this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map")); + // load domainList try { this.domainList = null; - Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainList", ""), getAppPath().getAbsolutePath()); - this.domainList = new FilterEngine(); - this.domainList.loadList(new BufferedReader(r), null); + if(!getConfig("network.unit.domainlist", "").equals("")) { + Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt")); + this.domainList = new FilterEngine(); + this.domainList.loadList(new BufferedReader(r), null); + } } catch (FileNotFoundException e) { + log.logSevere("CONFIG: domainlist not found: " + e.getMessage()); } catch (IOException e) { + log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage()); } this.crawlStacker = new CrawlStacker( diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index d7c6f3fa0..812f624c3 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -22,10 +22,10 @@ package de.anomic.server; import java.io.BufferedInputStream; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStreamReader; @@ -43,13 +43,11 @@ import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.http.HTTPClient; -import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.workflow.BusyThread; import net.yacy.kelondro.workflow.WorkflowThread; -import de.anomic.search.Switchboard; import de.anomic.server.serverAccessTracker.Track; import de.anomic.server.serverCore.Session; @@ -573,8 +571,9 @@ public class serverSwitch { * file may be an url or a filename with path relative to rootPath parameter * @param file url or filename * @param rootPath searchpath for file + * @param file file to use when remote fetching fails (null if unused) */ - public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws IOException, FileNotFoundException { + public Reader getConfigFileFromWebOrLocally(String uri, String rootPath, File file) throws IOException, FileNotFoundException { if(uri.startsWith("http://") || uri.startsWith("https://")) { String[] uris = uri.split(","); for (String netdef: uris) { @@ -586,12 +585,22 @@ public class serverSwitch { client.setHeader(reqHeader.entrySet()); byte[] data = client.GETbytes(uri); if (data == null || data.length == 0) continue; + // save locally in case next fetch fails + if (file != null) { + FileOutputStream f = new FileOutputStream(file); + f.write(data); + f.close(); + } return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data))); } catch (final Exception e) { continue; } } - throw new FileNotFoundException(); + if (file != null && file.exists()) { + return new FileReader(file); + } else { + throw new FileNotFoundException(); + } } else { final File f = (uri.length() > 0 && uri.charAt(0) == '/') ? new File(uri) : new File(rootPath, uri); if (f.exists()) { diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index 03e25754c..ec38df626 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -447,7 +447,7 @@ public class HTTPClient { byte[] content = null; try { execute(httpUriRequest); - if (httpResponse == null) return null; + if (httpResponse == null || httpResponse.getStatusLine().getStatusCode() != 200) return null; // get the response body final HttpEntity httpEntity = httpResponse.getEntity(); if (httpEntity != null) {