* add failsafe mechanisme to domainlist retrieval

domainlist is saved locally, if none of the given urls in network.unit.domainlist
  could be retrieved, the file from the last boot is used instead

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7289 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
f1ori 2010-11-02 17:57:48 +00:00
parent 70c95608d4
commit acd93b1b31
3 changed files with 38 additions and 18 deletions

View File

@ -345,6 +345,20 @@ public final class Switchboard extends serverSwitch {
// set the default segment names
setDefaultSegments();
// load domainList
try {
this.domainList = null;
if(!getConfig("network.unit.domainlist", "").equals("")) {
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
}
} catch (FileNotFoundException e) {
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
} catch (IOException e) {
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
}
// create a crawler
crawler = new CrawlSwitchboard(
networkName,
@ -824,15 +838,7 @@ public final class Switchboard extends serverSwitch {
}
*/
MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global"));
try {
this.domainList = null;
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath());
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
} catch (FileNotFoundException e) {
} catch (IOException e) {
}
}
public void switchNetwork(final String networkDefinition) {
@ -917,13 +923,18 @@ public final class Switchboard extends serverSwitch {
this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));
// load domainList
try {
this.domainList = null;
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainList", ""), getAppPath().getAbsolutePath());
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
if(!getConfig("network.unit.domainlist", "").equals("")) {
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
this.domainList = new FilterEngine();
this.domainList.loadList(new BufferedReader(r), null);
}
} catch (FileNotFoundException e) {
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
} catch (IOException e) {
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
}
this.crawlStacker = new CrawlStacker(

View File

@ -22,10 +22,10 @@
package de.anomic.server;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
@ -43,13 +43,11 @@ import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.workflow.BusyThread;
import net.yacy.kelondro.workflow.WorkflowThread;
import de.anomic.search.Switchboard;
import de.anomic.server.serverAccessTracker.Track;
import de.anomic.server.serverCore.Session;
@ -573,8 +571,9 @@ public class serverSwitch {
* file may be an url or a filename with path relative to rootPath parameter
* @param file url or filename
* @param rootPath searchpath for file
* @param file file to use when remote fetching fails (null if unused)
*/
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws IOException, FileNotFoundException {
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath, File file) throws IOException, FileNotFoundException {
if(uri.startsWith("http://") || uri.startsWith("https://")) {
String[] uris = uri.split(",");
for (String netdef: uris) {
@ -586,12 +585,22 @@ public class serverSwitch {
client.setHeader(reqHeader.entrySet());
byte[] data = client.GETbytes(uri);
if (data == null || data.length == 0) continue;
// save locally in case next fetch fails
if (file != null) {
FileOutputStream f = new FileOutputStream(file);
f.write(data);
f.close();
}
return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data)));
} catch (final Exception e) {
continue;
}
}
throw new FileNotFoundException();
if (file != null && file.exists()) {
return new FileReader(file);
} else {
throw new FileNotFoundException();
}
} else {
final File f = (uri.length() > 0 && uri.charAt(0) == '/') ? new File(uri) : new File(rootPath, uri);
if (f.exists()) {

View File

@ -447,7 +447,7 @@ public class HTTPClient {
byte[] content = null;
try {
execute(httpUriRequest);
if (httpResponse == null) return null;
if (httpResponse == null || httpResponse.getStatusLine().getStatusCode() != 200) return null;
// get the response body
final HttpEntity httpEntity = httpResponse.getEntity();
if (httpEntity != null) {