*) AbstractCrawlWorker.java: delete already downloaded data on crawling error

*) plasmaSwitchboard.java: log unexpected errors while parsing/indexing git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2552 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2006-09-12 04:50:12 +00:00 · 2006-09-12 04:50:12 +00:00 · a0ddf2ec11
commit a0ddf2ec11
parent 4f9e42d5ed
3 changed files with 10 additions and 0 deletions
--- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java
@ -47,6 +47,7 @@

 package de.anomic.plasma.crawler;

+import java.io.File;
 import java.io.IOException;

 import de.anomic.index.indexURL;
@ -277,5 +278,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
        
        // push it onto the stack
        this.sb.urlPool.errorURL.stackPushEntry(ee);
+        
+        // delete the cache file
+        File cacheFile = this.cacheManager.getCachePath(this.url);
+        if (cacheFile.exists()) cacheFile.delete();
    }    
 }
--- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java
+++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java
@ -454,6 +454,7 @@ public final class CrawlWorker extends AbstractCrawlWorker {
                return load(crawlingRetryCount - 1);
            }
            if (failreason != null) {
+                // add url into error db
                addURLtoErrorDB(failreason);
            }
            return null;
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -1749,6 +1749,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                }
            }
            document = null;
+        } catch (Exception e) {
+            this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
+        } catch (Error e) {
+            this.log.logSevere("Unexpected exception while parsing/indexing URL ",e);
        } finally {
            checkInterruption();