mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
* add failsafe mechanisme to domainlist retrieval
domainlist is saved locally, if none of the given urls in network.unit.domainlist could be retrieved, the file from the last boot is used instead git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7289 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
70c95608d4
commit
acd93b1b31
|
@ -345,6 +345,20 @@ public final class Switchboard extends serverSwitch {
|
|||
// set the default segment names
|
||||
setDefaultSegments();
|
||||
|
||||
// load domainList
|
||||
try {
|
||||
this.domainList = null;
|
||||
if(!getConfig("network.unit.domainlist", "").equals("")) {
|
||||
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
|
||||
this.domainList = new FilterEngine();
|
||||
this.domainList.loadList(new BufferedReader(r), null);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
|
||||
} catch (IOException e) {
|
||||
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
|
||||
}
|
||||
|
||||
// create a crawler
|
||||
crawler = new CrawlSwitchboard(
|
||||
networkName,
|
||||
|
@ -824,15 +838,7 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
*/
|
||||
MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig("network.unit.domain", "global"));
|
||||
|
||||
try {
|
||||
this.domainList = null;
|
||||
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath());
|
||||
this.domainList = new FilterEngine();
|
||||
this.domainList.loadList(new BufferedReader(r), null);
|
||||
} catch (FileNotFoundException e) {
|
||||
} catch (IOException e) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void switchNetwork(final String networkDefinition) {
|
||||
|
@ -917,13 +923,18 @@ public final class Switchboard extends serverSwitch {
|
|||
this.webStructure = new WebStructureGraph(log, rankingPath, "LOCAL/010_cr/", getConfig("CRDist0Path", CRDistribution.CR_OWN), new File(queuesRoot, "webStructure.map"));
|
||||
|
||||
|
||||
// load domainList
|
||||
try {
|
||||
this.domainList = null;
|
||||
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainList", ""), getAppPath().getAbsolutePath());
|
||||
this.domainList = new FilterEngine();
|
||||
this.domainList.loadList(new BufferedReader(r), null);
|
||||
if(!getConfig("network.unit.domainlist", "").equals("")) {
|
||||
Reader r = getConfigFileFromWebOrLocally(getConfig("network.unit.domainlist", ""), getAppPath().getAbsolutePath(), new File(this.networkRoot, "domainlist.txt"));
|
||||
this.domainList = new FilterEngine();
|
||||
this.domainList.loadList(new BufferedReader(r), null);
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
log.logSevere("CONFIG: domainlist not found: " + e.getMessage());
|
||||
} catch (IOException e) {
|
||||
log.logSevere("CONFIG: error while retrieving domainlist: " + e.getMessage());
|
||||
}
|
||||
|
||||
this.crawlStacker = new CrawlStacker(
|
||||
|
|
|
@ -22,10 +22,10 @@
|
|||
package de.anomic.server;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
@ -43,13 +43,11 @@ import net.yacy.cora.protocol.Domains;
|
|||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.kelondro.workflow.BusyThread;
|
||||
import net.yacy.kelondro.workflow.WorkflowThread;
|
||||
|
||||
import de.anomic.search.Switchboard;
|
||||
import de.anomic.server.serverAccessTracker.Track;
|
||||
import de.anomic.server.serverCore.Session;
|
||||
|
||||
|
@ -573,8 +571,9 @@ public class serverSwitch {
|
|||
* file may be an url or a filename with path relative to rootPath parameter
|
||||
* @param file url or filename
|
||||
* @param rootPath searchpath for file
|
||||
* @param file file to use when remote fetching fails (null if unused)
|
||||
*/
|
||||
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath) throws IOException, FileNotFoundException {
|
||||
public Reader getConfigFileFromWebOrLocally(String uri, String rootPath, File file) throws IOException, FileNotFoundException {
|
||||
if(uri.startsWith("http://") || uri.startsWith("https://")) {
|
||||
String[] uris = uri.split(",");
|
||||
for (String netdef: uris) {
|
||||
|
@ -586,12 +585,22 @@ public class serverSwitch {
|
|||
client.setHeader(reqHeader.entrySet());
|
||||
byte[] data = client.GETbytes(uri);
|
||||
if (data == null || data.length == 0) continue;
|
||||
// save locally in case next fetch fails
|
||||
if (file != null) {
|
||||
FileOutputStream f = new FileOutputStream(file);
|
||||
f.write(data);
|
||||
f.close();
|
||||
}
|
||||
return new InputStreamReader(new BufferedInputStream(new ByteArrayInputStream(data)));
|
||||
} catch (final Exception e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
throw new FileNotFoundException();
|
||||
if (file != null && file.exists()) {
|
||||
return new FileReader(file);
|
||||
} else {
|
||||
throw new FileNotFoundException();
|
||||
}
|
||||
} else {
|
||||
final File f = (uri.length() > 0 && uri.charAt(0) == '/') ? new File(uri) : new File(rootPath, uri);
|
||||
if (f.exists()) {
|
||||
|
|
|
@ -447,7 +447,7 @@ public class HTTPClient {
|
|||
byte[] content = null;
|
||||
try {
|
||||
execute(httpUriRequest);
|
||||
if (httpResponse == null) return null;
|
||||
if (httpResponse == null || httpResponse.getStatusLine().getStatusCode() != 200) return null;
|
||||
// get the response body
|
||||
final HttpEntity httpEntity = httpResponse.getEntity();
|
||||
if (httpEntity != null) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user