*) AbstractCrawlWorker.java: delete already downloaded data on crawling error

*) plasmaSwitchboard.java: log unexpected errors while parsing/indexing

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2552 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
theli 2006-09-12 04:50:12 +00:00
parent 4f9e42d5ed
commit a0ddf2ec11
3 changed files with 10 additions and 0 deletions

View File

@ -47,6 +47,7 @@
package de.anomic.plasma.crawler;
import java.io.File;
import java.io.IOException;
import de.anomic.index.indexURL;
@ -277,5 +278,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
// push it onto the stack
this.sb.urlPool.errorURL.stackPushEntry(ee);
// delete the cache file
File cacheFile = this.cacheManager.getCachePath(this.url);
if (cacheFile.exists()) cacheFile.delete();
}
}

View File

@ -454,6 +454,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
return load(crawlingRetryCount - 1);
}
if (failreason != null) {
// add url into error db
addURLtoErrorDB(failreason);
}
return null;

View File

@ -1749,6 +1749,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
}
document = null;
} catch (Exception e) {
this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
} catch (Error e) {
this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
} finally {
checkInterruption();