*) Before the crawler retries to download a URL it checks if the server is already doing a shutdown

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@554 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
theli 2005-08-17 11:36:48 +00:00
parent ea26b84eed
commit ea9a992f05

View File

@ -369,12 +369,20 @@ public final class plasmaCrawlWorker extends Thread {
URL redirectionUrl = new URL(url, (String) res.responseHeader.get(httpHeader.LOCATION)); URL redirectionUrl = new URL(url, (String) res.responseHeader.get(httpHeader.LOCATION));
// returning the used httpc // returning the used httpc
httpc.returnInstance(remote); httpc.returnInstance(remote);
remote = null; remote = null;
// restart crawling with new url // restart crawling with new url
log.logInfo("Redirection detected ('" + res.status + "') for URL " + url.toString() + log.logInfo("CRAWLER Redirection detected ('" + res.status + "') for URL " + url.toString() +
"\nRedirecting request to: " + redirectionUrl); "\nRedirecting request to: " + redirectionUrl);
// if we are already doing a shutdown we don't need to retry crawling
if (Thread.currentThread().isInterrupted()) {
log.logError("CRAWLER Retry of URL=" + url.toString() + " aborted because of server shutdown.");
return;
}
// retry crawling with new url
load(redirectionUrl, load(redirectionUrl,
name, name,
referer, referer,
@ -425,8 +433,21 @@ public final class plasmaCrawlWorker extends Thread {
} }
if (retryCrawling) { if (retryCrawling) {
// if we are already doing a shutdown we don't need to retry crawling
if (Thread.currentThread().isInterrupted()) {
log.logError("CRAWLER Retry of URL=" + url.toString() + " aborted because of server shutdown.");
return;
}
// returning the used httpc
httpc.returnInstance(remote);
remote = null;
// setting the retry counter to 1
if (crawlingRetryCount > 1) crawlingRetryCount = 1; if (crawlingRetryCount > 1) crawlingRetryCount = 1;
// retry crawling
load(url, load(url,
name, name,
referer, referer,