mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) AbstractCrawlWorker.java: delete already downloaded data on crawling error
*) plasmaSwitchboard.java: log unexpected errors while parsing/indexing git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2552 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
4f9e42d5ed
commit
a0ddf2ec11
|
@ -47,6 +47,7 @@
|
|||
|
||||
package de.anomic.plasma.crawler;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import de.anomic.index.indexURL;
|
||||
|
@ -277,5 +278,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
|
|||
|
||||
// push it onto the stack
|
||||
this.sb.urlPool.errorURL.stackPushEntry(ee);
|
||||
|
||||
// delete the cache file
|
||||
File cacheFile = this.cacheManager.getCachePath(this.url);
|
||||
if (cacheFile.exists()) cacheFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -454,6 +454,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
|
|||
return load(crawlingRetryCount - 1);
|
||||
}
|
||||
if (failreason != null) {
|
||||
// add url into error db
|
||||
addURLtoErrorDB(failreason);
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -1749,6 +1749,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
}
|
||||
}
|
||||
document = null;
|
||||
} catch (Exception e) {
|
||||
this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
|
||||
} catch (Error e) {
|
||||
this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
|
||||
} finally {
|
||||
checkInterruption();
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user