- removed ZURL data structure; removed also the ZURL data file

- replaced load failure logging by information which is stored in Solr - fixed a bug with crawling of feeds: added must-match pattern application to feed urls to filter out such urls which shall not be in a wanted domain - delegatedURLs, which also used ZURLs are now temporary objects in memory
2024-09-19 00:01:41 +02:00 · 2013-09-17 15:27:02 +02:00 · 2013-09-17 15:27:02 +02:00 · 2602be8d1e
commit 2602be8d1e
parent 31920385f7
20 changed files with 401 additions and 649 deletions
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -37,6 +37,7 @@ import java.util.regex.PatternSyntaxException;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
@ -44,8 +45,6 @@ import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.crawler.CrawlSwitchboard;
 import net.yacy.crawler.data.CrawlProfile;
-import net.yacy.crawler.data.ZURL.FailCategory;
-import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.SitemapImporter;
 import net.yacy.data.WorkTables;
 import net.yacy.document.Document;
@ -392,7 +391,7 @@ public class Crawler_p {
                for (DigestURL u: rootURLs) {
                    hosthashes.add(ASCII.getBytes(u.hosthash()));
                }
-                sb.crawlQueues.errorURL.removeHosts(hosthashes, false);
+                sb.crawlQueues.errorURL.removeHosts(hosthashes);
                for (byte[] hosthash: hosthashes) {
                    try {
                        String deletequery = CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]";
@ -440,24 +439,7 @@ public class Crawler_p {
                        } else {
                            StringBuilder fr = new StringBuilder();
                            for (Map.Entry<DigestURL, String> failure: failurls.entrySet()) {
-                                sb.crawlQueues.errorURL.push(
-                                    new Request(
-                                            sb.peers.mySeed().hash.getBytes(),
-                                            failure.getKey(),
-                                            null,
-                                            "",
-                                            new Date(),
-                                            profile.handle(),
-                                            0,
-                                            0,
-                                            0,
-                                            0),
-                                    null,
-                                    sb.peers.mySeed().hash.getBytes(),
-                                    new Date(),
-                                    1,
-                                    FailCategory.FINAL_LOAD_CONTEXT,
-                                    failure.getValue(), -1);
+                                sb.crawlQueues.errorURL.push(failure.getKey(), null, FailCategory.FINAL_LOAD_CONTEXT, failure.getValue(), -1);
                                fr.append(failure.getValue()).append('/');
                            }
    
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@ -439,7 +439,7 @@ public class HostBrowser {
                                FailType failType = errorDocs.get(entry.getKey());
                                if (failType == null) {
                                    // maybe this is only in the errorURL
-                                    prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(uri.hash()).anycause() : "unknown error");
+                                    prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())).getFailReason() : "unknown error");
                                } else {
                                    prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail");
                                }
--- a/htroot/IndexCreateParserErrors_p.html
+++ b/htroot/IndexCreateParserErrors_p.html
@ -32,16 +32,12 @@
      </colgroup>
      <tr class="TableHeader">
        <th>Time</th>
-        <th>Initiator</th>
-        <th>Executor</th>
        <th>URL</th>
        <th>Fail-Reason</th>
      </tr>
      #{list}#
      <tr class="TableCell#(dark)#Light::Dark#(/dark)#">
        <td>#[time]#</td>
-        <td>#[initiator]#</td>
-        <td>#[executor]#</td>
        <td><a href="#[url]#">#[url]#</a></td>
        <td>#[failreason]#</td>
      </tr>
--- a/htroot/IndexCreateParserErrors_p.java
+++ b/htroot/IndexCreateParserErrors_p.java
@ -24,15 +24,14 @@


 import java.util.ArrayList;
+import java.util.Date;

 import net.yacy.cora.date.GenericFormatter;
-import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.CrawlStacker;
-import net.yacy.crawler.data.ZURL;
-import net.yacy.peers.Seed;
 import net.yacy.search.Switchboard;
+import net.yacy.search.schema.CollectionConfiguration;
 import net.yacy.server.serverObjects;
 import net.yacy.server.serverSwitch;

@ -73,27 +72,19 @@ public class IndexCreateParserErrors_p {
            }
            dark = true;
            DigestURL url;
-            byte[] initiatorHash, executorHash;
-            Seed initiatorSeed, executorSeed;
            int j=0;
-            ArrayList<ZURL.Entry> l = sb.crawlQueues.errorURL.list(showRejectedCount);
-            ZURL.Entry entry;
+            ArrayList<CollectionConfiguration.FailDoc> l = sb.crawlQueues.errorURL.list(showRejectedCount);
+            CollectionConfiguration.FailDoc entry;
            for (int i = l.size() - 1; i >= 0; i--) {
                entry = l.get(i);
                if (entry == null) continue;
-                url = entry.url();
+                url = entry.getDigestURL();
                if (url == null) continue;
-
-                initiatorHash = entry.initiator();
-                executorHash = entry.executor();
-                initiatorSeed = (initiatorHash == null) ? null : sb.peers.getConnected(ASCII.String(initiatorHash));
-                executorSeed = (executorHash == null) ? null : sb.peers.getConnected(ASCII.String(executorHash));
-                prop.putHTML("rejected_list_"+j+"_time", GenericFormatter.SIMPLE_FORMATTER.format(entry.workdate()));
-                prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
-                prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
+                
+                prop.putHTML("rejected_list_"+j+"_time", GenericFormatter.SIMPLE_FORMATTER.format(new Date()));
                prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false));
                
-                String cause = entry.anycause();
+                String cause = entry.getFailReason();
                if (cause.startsWith(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER)) {
                    prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
                            "&regex=" + cause.substring(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER.length()) + "\">test</a>) " + cause);
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -32,6 +32,7 @@
 import java.net.MalformedURLException;
 import java.util.Date;

+import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.federate.yacy.CacheStrategy;
@ -127,7 +128,7 @@ public class QuickCrawlLink_p {
            final byte[] urlhash = crawlingStartURL.hash();
            indexSegment.fulltext().remove(urlhash);
            sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
-            sb.crawlQueues.errorURL.remove(urlhash);
+            sb.crawlQueues.errorURL.remove(ASCII.String(urlhash));

            // create crawling profile
            CrawlProfile pe = null;
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@ -30,11 +30,11 @@
 import java.io.IOException;

 import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.ResultURLs;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.peers.Protocol;
 import net.yacy.peers.Seed;
@ -161,14 +161,7 @@ public final class crawlReceipt {
        }

        sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
-        sb.crawlQueues.errorURL.push(
-                entry.toBalancerEntry(iam),
-                null,
-                youare.getBytes(),
-                null,
-                0,
-                FailCategory.FINAL_LOAD_CONTEXT,
-                result + ":" + reason, -1);
+        sb.crawlQueues.errorURL.push(entry.url(), null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
        //switchboard.noticeURL.remove(receivedUrlhash);
        prop.put("delay", "3600");
        return prop;
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -246,7 +246,7 @@ public final class search {
                    false,
                    indexSegment,
                    rankingProfile,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                    false,
                    false,
                    0.0d,
@ -310,7 +310,7 @@ public final class search {
                    false,
                    sb.index,
                    rankingProfile,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                    false,
                    false,
                    0.0d,
--- a/htroot/yacy/urls.java
+++ b/htroot/yacy/urls.java
@ -25,14 +25,11 @@
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 import java.io.IOException;
-import java.util.Date;
-
 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.data.NoticedURL;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.peers.Protocol;
@ -80,15 +77,7 @@ public class urls {
                referrer = sb.getURL(entry.referrerhash());

                // place url to notice-url db
-                sb.crawlQueues.delegatedURL.push(
-                                entry,
-                                null,
-                                sb.peers.mySeed().hash.getBytes(),
-                                new Date(),
-                                0,
-                                FailCategory.FINAL_PROCESS_CONTEXT,
-                                "client=____________",
-                                -1);
+                sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url());

                // create RSS entry
                prop.put("item_" + c + "_title", "");
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -663,7 +663,7 @@ public class yacysearch {
                    authenticated,
                    indexSegment,
                    ranking,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                    sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false)
                        && sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false)
                        && sb.peers.mySeed().getFlagAcceptRemoteIndex(),
--- a/source/net/yacy/cora/federate/solr/FailCategory.java
+++ b/source/net/yacy/cora/federate/solr/FailCategory.java
@ -0,0 +1,39 @@
+/**
+ *  FailCategory
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 17.10.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.federate.solr;
+
+public enum FailCategory {
+    // TEMPORARY categories are such failure cases that should be tried again
+    // FINAL categories are such failure cases that are final and should not be tried again
+    TEMPORARY_NETWORK_FAILURE(true, FailType.fail), // an entity could not been loaded
+    FINAL_PROCESS_CONTEXT(false, FailType.excl),    // because of a processing context we do not want that url again (i.e. remote crawling)
+    FINAL_LOAD_CONTEXT(false, FailType.excl),       // the crawler configuration does not want to load the entity
+    FINAL_ROBOTS_RULE(true, FailType.excl),         // a remote server denies indexing or loading
+    FINAL_REDIRECT_RULE(true, FailType.excl);       // the remote server redirects this page, thus disallowing reading of content
+
+    public final boolean store;
+    public final FailType failType;
+
+    private FailCategory(boolean store, FailType failType) {
+        this.store = store;
+        this.failType = failType;
+    }
+}
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@ -41,6 +41,7 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.ftp.FTPClient;
@ -49,9 +50,7 @@ import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.data.NoticedURL;
 import net.yacy.crawler.data.ResultURLs;
-import net.yacy.crawler.data.ZURL;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.FTPLoader;
 import net.yacy.crawler.retrieval.HTTPLoader;
 import net.yacy.crawler.retrieval.Request;
@ -65,6 +64,7 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.repository.FilterEngine;
 import net.yacy.search.Switchboard;
 import net.yacy.search.index.Segment;
+import net.yacy.search.schema.CollectionConfiguration;

 public final class CrawlStacker {
    
@ -75,7 +75,7 @@ public final class CrawlStacker {
    private final ConcurrentLog log = new ConcurrentLog("STACKCRAWL");
    private final RobotsTxt robots;
    private final WorkflowProcessor<Request>  requestQueue;
-    private final CrawlQueues       nextQueue;
+    public  final CrawlQueues       nextQueue;
    private final CrawlSwitchboard  crawler;
    private final Segment           indexSegment;
    private final SeedDB            peers;
@ -151,7 +151,7 @@ public final class CrawlStacker {
            // if the url was rejected we store it into the error URL db
            if (rejectReason != null && !rejectReason.startsWith("double in")) {
                final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle()));
-                this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                this.nextQueue.errorURL.push(entry.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
            }
        } catch (final Exception e) {
            CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
@ -186,7 +186,7 @@ public final class CrawlStacker {
                this.indexSegment.fulltext().remove(urlhash);
                byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6);
                List<byte[]> hosthashes = new ArrayList<byte[]>(); hosthashes.add(hosthash);
-                this.nextQueue.errorURL.removeHosts(hosthashes, false);
+                this.nextQueue.errorURL.removeHosts(hosthashes);
                this.nextQueue.removeURL(urlhash);
                String u = url.toNormalform(true);
                if (u.endsWith("/")) {
@ -198,7 +198,7 @@ public final class CrawlStacker {
                    final byte[] uh = new DigestURL(u).hash();
                    this.indexSegment.fulltext().remove(uh);
                    this.nextQueue.noticeURL.removeByURLHash(uh);
-                    this.nextQueue.errorURL.remove(uh);
+                    this.nextQueue.errorURL.remove(ASCII.String(uh));
                } catch (final MalformedURLException e1) {}
            }

@ -246,7 +246,7 @@ public final class CrawlStacker {
                        if (replace) {
                            CrawlStacker.this.indexSegment.fulltext().remove(urlhash);
                            cq.noticeURL.removeByURLHash(urlhash);
-                            cq.errorURL.remove(urlhash);
+                            cq.errorURL.remove(ASCII.String(urlhash));
                        }

                        // put entry on crawl stack
@ -425,8 +425,8 @@ public final class CrawlStacker {
            if (dbocc != null) {
                // do double-check
                if (dbocc == HarvestProcess.ERRORS) {
-                    final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
-                    return "double in: errors (" + errorEntry.anycause() + ")";
+                    final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
+                    return "double in: errors (" + errorEntry.getFailReason() + ")";
                }
                return "double in: " + dbocc.toString();
            }
@ -441,9 +441,9 @@ public final class CrawlStacker {
                    return "double in: LURL-DB, oldDate = " + oldDate.toString();
                }
                if (dbocc == HarvestProcess.ERRORS) {
-                    final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
-                    if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.anycause());
-                    return "double in: errors (" + errorEntry.anycause() + "), oldDate = " + oldDate.toString();
+                    final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
+                    if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason());
+                    return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString();
                }
                if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. ");
                return "double in: " + dbocc.toString() + ", oldDate = " + oldDate.toString();
--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@ -40,17 +40,16 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.feed.Hit;
 import net.yacy.cora.document.feed.RSSFeed;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.ConnectionInfo;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.HarvestProcess;
 import net.yacy.crawler.data.NoticedURL.StackType;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.robots.RobotsTxtEntry;
-import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.workflow.WorkflowJob;
 import net.yacy.peers.DHTSelection;
 import net.yacy.peers.Protocol;
@ -59,19 +58,19 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.IndexingQueueEntry;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
+import net.yacy.search.index.ErrorCache;
+import net.yacy.search.schema.CollectionConfiguration;

 public class CrawlQueues {

-    private static final String ERROR_DB_FILENAME = "urlError4.db";
-    private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
-
    private Switchboard sb;
    private ConcurrentLog log;
    private Map<Integer, Loader> workers; // mapping from url hash to Worker thread object
    private final ArrayList<String> remoteCrawlProviderHashes;

    public  NoticedURL noticeURL;
-    public  ZURL errorURL, delegatedURL;
+    public  ErrorCache errorURL;
+    public Map<String, DigestURL> delegatedURL;

    public CrawlQueues(final Switchboard sb, final File queuePath) {
        this.sb = sb;
@ -82,10 +81,8 @@ public class CrawlQueues {
        // start crawling management
        this.log.config("Starting Crawling Management");
        this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727);
-        FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(sb.index.fulltext(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
-        this.delegatedURL = new ZURL(sb.index.fulltext(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
-        try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time
+        this.errorURL = new ErrorCache(sb.index.fulltext());
+        this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
    }

    public void relocate(final File newQueuePath) {
@ -95,10 +92,8 @@ public class CrawlQueues {
        this.remoteCrawlProviderHashes.clear();

        this.noticeURL = new NoticedURL(newQueuePath, this.sb.useTailCache, this.sb.exceed134217727);
-        FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(this.sb.index.fulltext(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
-        this.delegatedURL = new ZURL(this.sb.index.fulltext(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
-        try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time
+        this.errorURL = new ErrorCache(this.sb.index.fulltext());
+        this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
    }

    public synchronized void close() {
@ -114,8 +109,7 @@ public class CrawlQueues {
            }
        }
        this.noticeURL.close();
-        this.errorURL.close();
-        this.delegatedURL.close();
+        this.delegatedURL.clear();
    }

    public void clear() {
@ -130,11 +124,7 @@ public class CrawlQueues {
        } catch (final IOException e) {
            ConcurrentLog.logException(e);
        }
-        try {
-            this.delegatedURL.clear();
-        } catch (final IOException e) {
-            ConcurrentLog.logException(e);
-        }
+        this.delegatedURL.clear();
    }

    /**
@ -143,7 +133,7 @@ public class CrawlQueues {
     * @return if the hash exists, the name of the database is returned, otherwise null is returned
     */
    public HarvestProcess exists(final byte[] hash) {
-        if (this.delegatedURL.exists(hash)) {
+        if (this.delegatedURL.containsKey(ASCII.String(hash))) {
            return HarvestProcess.DELEGATED;
        }
        if (this.errorURL.exists(hash)) {
@ -164,7 +154,7 @@ public class CrawlQueues {
        assert hash != null && hash.length == 12;
        this.noticeURL.removeByURLHash(hash);
        this.delegatedURL.remove(hash);
-        this.errorURL.remove(hash);
+        this.errorURL.remove(ASCII.String(hash));
    }

    public DigestURL getURL(final byte[] urlhash) {
@ -172,13 +162,13 @@ public class CrawlQueues {
        if (urlhash == null || urlhash.length == 0) {
            return null;
        }
-        ZURL.Entry ee = this.delegatedURL.get(urlhash);
-        if (ee != null) {
-            return ee.url();
+        DigestURL u = this.delegatedURL.get(ASCII.String(urlhash));
+        if (u != null) {
+            return u;
        }
-        ee = this.errorURL.get(urlhash);
+        CollectionConfiguration.FailDoc ee = this.errorURL.get(ASCII.String(urlhash));
        if (ee != null) {
-            return ee.url();
+            return ee.getDigestURL();
        }
        for (final Loader w: this.workers.values()) {
            if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) {
@ -639,14 +629,7 @@ public class CrawlQueues {
                    (robotsEntry = CrawlQueues.this.sb.robots.getEntry(this.request.url(), this.profile.getAgent())) != null &&
                    robotsEntry.isDisallowed(this.request.url())) {
                    //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
-                    CrawlQueues.this.errorURL.push(
-                            this.request,
-                            profile,
-                            ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                            new Date(),
-                            1,
-                            FailCategory.FINAL_ROBOTS_RULE,
-                            "denied by robots.txt", -1);
+                    CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.FINAL_ROBOTS_RULE, "denied by robots.txt", -1);
                    this.request.setStatus("worker-disallowed", WorkflowJob.STATUS_FINISHED);
                } else {
                    // starting a load from the internet
@ -679,28 +662,14 @@ public class CrawlQueues {
                    }

                    if (result != null) {
-                        CrawlQueues.this.errorURL.push(
-                                this.request,
-                                profile,
-                                ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                                new Date(),
-                                1,
-                                FailCategory.TEMPORARY_NETWORK_FAILURE,
-                                "cannot load: " + result, -1);
+                        CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "cannot load: " + result, -1);
                        this.request.setStatus("worker-error", WorkflowJob.STATUS_FINISHED);
                    } else {
                        this.request.setStatus("worker-processed", WorkflowJob.STATUS_FINISHED);
                    }
                }
            } catch (final Exception e) {
-                CrawlQueues.this.errorURL.push(
-                        this.request,
-                        profile,
-                        ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                        new Date(),
-                        1,
-                        FailCategory.TEMPORARY_NETWORK_FAILURE,
-                        e.getMessage() + " - in worker", -1);
+                CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, e.getMessage() + " - in worker", -1);
                ConcurrentLog.logException(e);
                this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED);
            } finally {
--- a/source/net/yacy/crawler/data/ZURL.java
+++ b/source/net/yacy/crawler/data/ZURL.java
@ -1,365 +0,0 @@
-// plasmaCrawlZURL.java
-// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 15.03.2007 on http://www.anomic.de
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.crawler.data;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Queue;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import org.apache.solr.common.SolrInputDocument;
-
-import net.yacy.cora.document.encoding.UTF8;
-import net.yacy.cora.document.id.DigestURL;
-import net.yacy.cora.federate.solr.FailType;
-import net.yacy.cora.order.Base64Order;
-import net.yacy.cora.order.NaturalOrder;
-import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.cora.util.SpaceExceededException;
-import net.yacy.crawler.retrieval.Request;
-import net.yacy.kelondro.data.word.Word;
-import net.yacy.kelondro.index.Index;
-import net.yacy.kelondro.index.Row;
-import net.yacy.kelondro.table.SplitTable;
-import net.yacy.kelondro.table.Table;
-import net.yacy.kelondro.util.FileUtils;
-import net.yacy.search.index.Fulltext;
-
-public class ZURL implements Iterable<ZURL.Entry> {
-
-    private static ConcurrentLog log = new ConcurrentLog("REJECTED");
-
-    private static final int EcoFSBufferSize = 2000;
-    private static final int maxStackSize    = 1000;
-
-    public enum FailCategory {
-        // TEMPORARY categories are such failure cases that should be tried again
-        // FINAL categories are such failure cases that are final and should not be tried again
-        TEMPORARY_NETWORK_FAILURE(true, FailType.fail), // an entity could not been loaded
-        FINAL_PROCESS_CONTEXT(false, FailType.excl),    // because of a processing context we do not want that url again (i.e. remote crawling)
-        FINAL_LOAD_CONTEXT(false, FailType.excl),       // the crawler configuration does not want to load the entity
-        FINAL_ROBOTS_RULE(true, FailType.excl),         // a remote server denies indexing or loading
-        FINAL_REDIRECT_RULE(true, FailType.excl);       // the remote server redirects this page, thus disallowing reading of content
-
-        public final boolean store;
-        public final FailType failType;
-
-        private FailCategory(boolean store, FailType failType) {
-            this.store = store;
-            this.failType = failType;
-        }
-    }
-
-    private final static Row rowdef = new Row(
-            "String urlhash-"   + Word.commonHashLength + ", " + // the url's hash
-            "String executor-"  + Word.commonHashLength + ", " + // the crawling executor
-            "Cardinal workdate-8 {b256}, " +                           // the time when the url was last time tried to load
-            "Cardinal workcount-4 {b256}, " +                          // number of load retries
-            "String anycause-132, " +                                   // string describing load failure
-            "byte[] entry-" + Request.rowdef.objectsize,                                          // extra space
-            Base64Order.enhancedCoder
-    );
-
-    // the class object
-    private Index urlIndex;
-    private final Queue<byte[]> stack;
-    private final Fulltext fulltext;
-
-    protected ZURL(
-            final Fulltext fulltext,
-    		final File cachePath,
-    		final String tablename,
-    		final boolean startWithEmptyFile,
-            final boolean useTailCache,
-            final boolean exceed134217727) {
-        this.fulltext = fulltext;
-        // creates a new ZURL in a file
-        cachePath.mkdirs();
-        final File f = new File(cachePath, tablename);
-        if (startWithEmptyFile) {
-            if (f.exists()) {
-                if (f.isDirectory()) SplitTable.delete(cachePath, tablename); else FileUtils.deletedelete(f);
-            }
-        }
-        try {
-            this.urlIndex = new Table(f, rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727, true);
-        } catch (final SpaceExceededException e) {
-            try {
-                this.urlIndex = new Table(f, rowdef, 0, 0, false, exceed134217727, true);
-            } catch (final SpaceExceededException e1) {
-                ConcurrentLog.logException(e1);
-            }
-        }
-        //urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
-        this.stack = new LinkedBlockingQueue<byte[]>();
-    }
-
-    protected void clear() throws IOException {
-        if (this.urlIndex != null) this.urlIndex.clear();
-        if (this.stack != null) this.stack.clear();
-    }
-
-    protected void close() {
-        try {clear();} catch (final IOException e) {}
-        if (this.urlIndex != null) this.urlIndex.close();
-    }
-
-    public boolean remove(final byte[] hash) {
-        if (hash == null) return false;
-        //System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " remove " + hash);
-        try {
-            Iterator<byte[]> i = ZURL.this.stack.iterator();
-            while (i.hasNext()) {
-                byte[] b = i.next();
-                if (NaturalOrder.naturalOrder.equal(hash, b)) i.remove();
-            }
-            return this.urlIndex.delete(hash);
-        } catch (final IOException e) {
-            return false;
-        }
-    }
-    
-    public void removeHosts(final Iterable<byte[]> hosthashes, final boolean concurrent) {
-        if (hosthashes == null) return;
-        Thread t = new Thread() {
-            public void run() {
-                try {
-                    Iterator<byte[]> i = ZURL.this.urlIndex.keys(true, null);
-                    List<byte[]> r = new ArrayList<byte[]>();
-                    while (i.hasNext()) {
-                        byte[] b = i.next();
-                        for (byte[] hosthash: hosthashes) {
-                            if (NaturalOrder.naturalOrder.equal(hosthash, 0, b, 6, 6)) r.add(b);
-                        }
-                    }
-                    for (byte[] b: r) ZURL.this.urlIndex.remove(b);
-                    i = ZURL.this.stack.iterator();
-                    while (i.hasNext()) {
-                        byte[] b = i.next();
-                        for (byte[] hosthash: hosthashes) {
-                            if (NaturalOrder.naturalOrder.equal(hosthash, 0, b, 6, 6)) i.remove();
-                        }
-                    }
-                } catch (final IOException e) {}
-            }
-        };
-        if (concurrent) t.start(); else t.run();
-    }
-
-    public void push(
-            final Request bentry,
-            final CrawlProfile profile,
-            final byte[] executor,
-            final Date workdate,
-            final int workcount,
-            final FailCategory failCategory,
-            String anycause,
-            final int httpcode) {
-        // assert executor != null; // null == proxy !
-        assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name();
-        if (exists(bentry.url().hash())) return; // don't insert double causes
-        if (anycause == null) anycause = "unknown";
-        final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
-        final Entry entry = new Entry(bentry, executor, workdate, workcount, reason);
-        put(entry);
-        this.stack.add(entry.hash());
-        if (!reason.startsWith("double")) log.info(bentry.url().toNormalform(true) + " - " + reason);
-        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
-            // send the error to solr
-            try {
-                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
-                this.fulltext.getDefaultConnector().add(errorDoc);
-            } catch (final IOException e) {
-                ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
-            }
-        }
-        while (this.stack.size() > maxStackSize) this.stack.poll();
-    }
-
-    @Override
-    public Iterator<ZURL.Entry> iterator() {
-        return new EntryIterator();
-    }
-
-    public ArrayList<ZURL.Entry> list(int max) {
-        final ArrayList<ZURL.Entry> l = new ArrayList<ZURL.Entry>();
-        DigestURL url;
-        for (final ZURL.Entry entry: this) {
-            if (entry == null) continue;
-            url = entry.url();
-            if (url == null) continue;
-            l.add(entry);
-            if (max-- <= 0) l.remove(0);
-        }
-        return l;
-    }
-
-    private class EntryIterator implements Iterator<ZURL.Entry> {
-        private final Iterator<byte[]> hi;
-        public EntryIterator() {
-            this.hi = ZURL.this.stack.iterator();
-        }
-        @Override
-        public boolean hasNext() {
-            return this.hi.hasNext();
-        }
-
-        @Override
-        public ZURL.Entry next() {
-            return get(this.hi.next());
-        }
-
-        @Override
-        public void remove() {
-            this.hi.remove();
-        }
-
-    }
-
-    public ZURL.Entry get(final byte[] urlhash) {
-        try {
-            if (this.urlIndex == null) return null;
-            // System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " get " + urlhash);
-            final Row.Entry entry = this.urlIndex.get(urlhash, false);
-            if (entry == null) return null;
-            return new Entry(entry);
-        } catch (final IOException e) {
-            ConcurrentLog.logException(e);
-            return null;
-        }
-    }
-
-    /**
-     * private put (use push instead)
-     * @param entry
-     */
-    private void put(final Entry entry) {
-        // stores the values from the object variables into the database
-        if (entry.stored) return;
-        if (entry.bentry == null) return;
-        final Row.Entry newrow = rowdef.newEntry();
-        newrow.setCol(0, entry.bentry.url().hash());
-        newrow.setCol(1, entry.executor);
-        newrow.setCol(2, entry.workdate.getTime());
-        newrow.setCol(3, entry.workcount);
-        newrow.setCol(4, UTF8.getBytes(entry.anycause));
-        newrow.setCol(5, entry.bentry.toRow().bytes());
-        try {
-            if (this.urlIndex != null) this.urlIndex.put(newrow);
-            entry.stored = true;
-        } catch (final Exception e) {
-            ConcurrentLog.logException(e);
-        }
-    }
-
-    boolean exists(final byte[] urlHash) {
-        return this.urlIndex.has(urlHash);
-    }
-
-    public void clearStack() {
-        this.stack.clear();
-    }
-
-    public int stackSize() {
-        return this.stack.size();
-    }
-
-    public class Entry {
-
-        private Request bentry;    // the balancer entry
-        private final byte[]   executor;  // the crawling executor
-        private final Date     workdate;  // the time when the url was last time tried to load
-        private final int      workcount; // number of tryings
-        private final String   anycause;  // string describing reason for load fail
-        private boolean  stored;
-
-        private Entry(
-                final Request bentry,
-                final byte[] executor,
-                final Date workdate,
-                final int workcount,
-                final String anycause) {
-            // create new entry
-            assert bentry != null;
-            // assert executor != null; // null == proxy !
-            this.bentry = bentry;
-            this.executor = executor;
-            this.workdate = (workdate == null) ? new Date() : workdate;
-            this.workcount = workcount;
-            this.anycause = (anycause == null) ? "" : anycause;
-            this.stored = false;
-        }
-
-        private Entry(final Row.Entry entry) throws IOException {
-            assert (entry != null);
-            this.executor = entry.getColBytes(1, true);
-            this.workdate = new Date(entry.getColLong(2));
-            this.workcount = (int) entry.getColLong(3);
-            this.anycause = entry.getColUTF8(4);
-            this.bentry = new Request(Request.rowdef.newEntry(entry.getColBytes(5, false)));
-            assert (Base64Order.enhancedCoder.equal(entry.getPrimaryKeyBytes(), this.bentry.url().hash()));
-            this.stored = true;
-            return;
-        }
-
-        public DigestURL url() {
-            return this.bentry.url();
-        }
-
-        public byte[] initiator() {
-            return this.bentry.initiator();
-        }
-
-        private byte[] hash() {
-            // return a url-hash, based on the md5 algorithm
-            // the result is a String of 12 bytes within a 72-bit space
-            // (each byte has an 6-bit range)
-            // that should be enough for all web pages on the world
-            return this.bentry.url().hash();
-        }
-
-        public Date workdate() {
-            return this.workdate;
-        }
-
-        public byte[] executor() {
-            // return the creator's hash
-            return this.executor;
-        }
-
-        public String anycause() {
-            return this.anycause;
-        }
-
-    }
-
-}
-
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@ -36,6 +36,7 @@ import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.protocol.ResponseHeader;
@ -43,7 +44,6 @@ import net.yacy.cora.protocol.ftp.FTPClient;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.Latency;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.document.TextParser;
 import net.yacy.search.Switchboard;

@ -156,7 +156,7 @@ public class FTPLoader {
        if (berr.size() > 0 || response == null) {
            // some error logging
            final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
-            this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
            throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
        }

--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@ -25,10 +25,9 @@
 package net.yacy.crawler.retrieval;

 import java.io.IOException;
-import java.util.Date;

-import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
@ -37,7 +36,6 @@ import net.yacy.cora.protocol.http.HTTPClient;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.Latency;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.kelondro.io.ByteCount;
 import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.Switchboard;
@ -79,10 +77,8 @@ public final class HTTPLoader {

    private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {

-        byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash);
-
        if (retryCount < 0) {
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
            throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
        }

@ -98,7 +94,7 @@ public final class HTTPLoader {
        // check if url is in blacklist
        final String hostlow = host.toLowerCase();
        if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }

@ -145,7 +141,7 @@ public final class HTTPLoader {
            redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();

            if (redirectionUrlString.isEmpty()) {
-                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
+                this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
                throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
            }

@ -159,13 +155,13 @@ public final class HTTPLoader {
            this.sb.webStructure.generateCitationReference(url, redirectionUrl);
            
            if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
-                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
+                this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
            }

    	    if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
                // if we are already doing a shutdown we don't need to retry crawling
                if (Thread.currentThread().isInterrupted()) {
-                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
                    throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                }

@ -174,11 +170,11 @@ public final class HTTPLoader {
                return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
    	    }
            // we don't want to follow redirects
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
            throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        } else if (responseBody == null) {
    	    // no response, reject file
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
            throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
    	} else if (statusCode == 200 || statusCode == 203) {
            // the transfer is ok
@ -189,7 +185,7 @@ public final class HTTPLoader {

            // check length again in case it was not possible to get the length before loading
            if (maxFileSize >= 0 && contentLength > maxFileSize) {
-            	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
+            	this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
            	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
            }

@ -206,7 +202,7 @@ public final class HTTPLoader {
            return response;
    	} else {
            // if the response has not the right response type then reject file
-        	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
+        	this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
            throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        }
    }
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@ -42,6 +42,7 @@ import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.HeaderFramework;
@ -50,7 +51,6 @@ import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.Cache;
 import net.yacy.crawler.data.CrawlProfile;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.FTPLoader;
 import net.yacy.crawler.retrieval.FileLoader;
 import net.yacy.crawler.retrieval.HTTPLoader;
@ -191,7 +191,7 @@ public final class LoaderDispatcher {
        
        // check if url is in blacklist
        if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
-            this.sb.crawlQueues.errorURL.push(request, crawlProfile, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }

--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -97,6 +97,7 @@ import net.yacy.cora.document.feed.RSSReader;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.solr.Ranking;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.federate.solr.instance.RemoteInstance;
@ -127,7 +128,6 @@ import net.yacy.crawler.data.ResultImages;
 import net.yacy.crawler.data.ResultURLs;
 import net.yacy.crawler.data.NoticedURL.StackType;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.robots.RobotsTxt;
@ -1789,16 +1789,9 @@ public final class Switchboard extends serverSwitch {
        // in the noIndexReason is set, indexing is not allowed
        if ( noIndexReason != null ) {
            // log cause and close queue
-            final DigestURL referrerURL = response.referrerURL();
            //if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
-            addURLtoErrorDB(
-                response.url(),
-                response.profile(),
-                (referrerURL == null) ? null : referrerURL.hash(),
-                response.initiator(),
-                response.name(),
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                noIndexReason);
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, noIndexReason, -1);
            // finish this entry
            return "not allowed: " + noIndexReason;
        }
@ -1991,7 +1984,7 @@ public final class Switchboard extends serverSwitch {
    
    public int cleanupJobSize() {
        int c = 1; // "es gibt immer was zu tun"
-        if ( (this.crawlQueues.delegatedURL.stackSize() > 1000) ) {
+        if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
            c++;
        }
        if ( (this.crawlQueues.errorURL.stackSize() > 1000) ) {
@ -2101,13 +2094,13 @@ public final class Switchboard extends serverSwitch {

            // clean up delegated stack
            checkInterruption();
-            if ( (this.crawlQueues.delegatedURL.stackSize() > 1000) ) {
+            if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
                if ( this.log.isFine() ) {
                    this.log.fine("Cleaning Delegated-URLs report stack, "
-                        + this.crawlQueues.delegatedURL.stackSize()
+                        + this.crawlQueues.delegatedURL.size()
                        + " entries on stack");
                }
-                this.crawlQueues.delegatedURL.clearStack();
+                this.crawlQueues.delegatedURL.clear();
            }

            // clean up error stack
@ -2428,7 +2421,6 @@ public final class Switchboard extends serverSwitch {

    public IndexingQueueEntry parseDocument(final IndexingQueueEntry in) {
        in.queueEntry.updateStatus(Response.QUEUE_STATE_PARSING);
-
        Document[] documents = null;
        try {
            documents = parseDocument(in.queueEntry);
@ -2439,7 +2431,7 @@ public final class Switchboard extends serverSwitch {
        }
        if ( documents == null ) {
            return null;
-        }
+        }        
        return new IndexingQueueEntry(in.queueEntry, documents, null);
    }

@ -2465,14 +2457,8 @@ public final class Switchboard extends serverSwitch {
            response.setContent(Cache.getContent(response.url().hash()));
            if ( response.getContent() == null ) {
                this.log.warn("the resource '" + response.url() + "' is missing in the cache.");
-                addURLtoErrorDB(
-                    response.url(),
-                    response.profile(),
-                    response.referrerHash(),
-                    response.initiator(),
-                    response.name(),
-                    FailCategory.FINAL_LOAD_CONTEXT,
-                    "missing in cache");
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_LOAD_CONTEXT, "missing in cache", -1);
                return null;
            }
        }
@ -2490,20 +2476,37 @@ public final class Switchboard extends serverSwitch {
            }
        } catch (final Parser.Failure e ) {
            this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
-            addURLtoErrorDB(
-                response.url(),
-                response.profile(),
-                response.referrerHash(),
-                response.initiator(),
-                response.name(),
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                e.getMessage());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1);
            return null;
        }
-
        final long parsingEndTime = System.currentTimeMillis();
+        
+        
        // put anchors on crawl stack
        final long stackStartTime = System.currentTimeMillis();
+        // check if the documents have valid urls; this is not a bug patch; it is possible that
+        // i.e. the result of a feed parsing results in documents from domains which shall be filtered by the crawl profile
+        if (response.profile() != null) {
+            ArrayList<Document> newDocs = new ArrayList<Document>();
+            for (Document doc: documents) {
+                String rejectReason = this.crawlStacker.checkAcceptance(doc.dc_source(), response.profile(), 1 /*depth is irrelevant here, we just make clear its not the start url*/);
+                if (rejectReason == null) {
+                    newDocs.add(doc);
+                } else {
+                    // we consider this as fail urls to have a tracking of the problem
+                    if (rejectReason != null && !rejectReason.startsWith("double in")) {
+                        final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(response.profile().handle()));
+                        this.crawlStacker.nextQueue.errorURL.push(response.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                    }
+                }
+            }
+            if (newDocs.size() != documents.length) {
+                documents = (Document[]) newDocs.toArray();
+            }
+        }
+        
+        // collect anchors within remaining documents
        if ((processCase == EventOrigin.PROXY_LOAD || processCase == EventOrigin.LOCAL_CRAWLING) &&
            (
                response.profile() == null ||
@ -2592,14 +2595,8 @@ public final class Switchboard extends serverSwitch {
        if (!(profile.indexUrlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN || profile.indexUrlMustMatchPattern().matcher(urls).matches()) ||
             (profile.indexUrlMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.indexUrlMustNotMatchPattern().matcher(urls).matches())) {
            if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
-            addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern(), -1);
            return new IndexingQueueEntry(in.queueEntry, in.documents, null);
        }
        
@ -2608,27 +2605,15 @@ public final class Switchboard extends serverSwitch {
        docloop: for (final Document document : in.documents) {
            if (document.indexingDenied() && profile.obeyHtmlRobotsNoindex()) {
                if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule");
-                addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "denied by document-attached noindexing rule");
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "denied by document-attached noindexing rule", -1);
                continue docloop;
            }
            if (!(profile.indexContentMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN || profile.indexContentMustMatchPattern().matcher(document.getTextString()).matches()) ||
                 (profile.indexContentMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.indexContentMustNotMatchPattern().matcher(document.getTextString()).matches())) {
                if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
-                addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern(), -1);
                continue docloop;
            }
            doclist.add(document);
@ -2705,30 +2690,18 @@ public final class Switchboard extends serverSwitch {

        if (condenser == null || (document.indexingDenied() && profile.obeyHtmlRobotsNoindex())) {
            //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
-            addURLtoErrorDB(
-                url,
-                profile,
-                (referrerURL == null) ? null : referrerURL.hash(),
-                queueEntry.initiator(),
-                dc_title,
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                "denied by rule in document, process case=" + processCase);
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(url, profile, FailCategory.FINAL_PROCESS_CONTEXT, "denied by rule in document, process case=" + processCase, -1);
            return;
        }

        if ( profile != null && !profile.indexText() && !profile.indexMedia() ) {
            //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
-            addURLtoErrorDB(
-                url,
-                profile,
-                (referrerURL == null) ? null : referrerURL.hash(),
-                queueEntry.initiator(),
-                dc_title,
-                FailCategory.FINAL_LOAD_CONTEXT,
-                "denied by profile rule, process case="
-                    + processCase
-                    + ", profile name = "
-                    + profile.collectionName());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(url, profile, FailCategory.FINAL_LOAD_CONTEXT, "denied by profile rule, process case="
+                                + processCase
+                                + ", profile name = "
+                                + profile.collectionName(), -1);
            return;
        }

@ -2906,7 +2879,7 @@ public final class Switchboard extends serverSwitch {
        // remove the document from the error-db
        byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6);
        List<byte[]> hosthashes = new ArrayList<byte[]>(); hosthashes.add(hosthash);
-        this.crawlQueues.errorURL.removeHosts(hosthashes, false);
+        this.crawlQueues.errorURL.removeHosts(hosthashes);
        this.crawlQueues.removeURL(urlhash);

        // get a scraper to get the title
@ -3373,31 +3346,6 @@ public final class Switchboard extends serverSwitch {
        return hasDoneSomething;
    }

-    private void addURLtoErrorDB(
-        final DigestURL url,
-        final CrawlProfile profile,
-        final byte[] referrerHash,
-        final byte[] initiator,
-        final String name,
-        final FailCategory failCategory,
-        final String failreason) {
-        // assert initiator != null; // null == proxy
-        // create a new errorURL DB entry
-        final Request bentry =
-            new Request(
-                initiator,
-                url,
-                referrerHash,
-                (name == null) ? "" : name,
-                new Date(),
-                null,
-                0,
-                0,
-                0,
-                0);
-        this.crawlQueues.errorURL.push(bentry, profile, initiator, new Date(), 0, failCategory, failreason, -1);
-    }
-
    public final void heuristicSite(final SearchEvent searchEvent, final String host) {
        new Thread() {
            @Override
--- a/source/net/yacy/search/index/ErrorCache.java
+++ b/source/net/yacy/search/index/ErrorCache.java
@ -0,0 +1,173 @@
+/**
+ *  ErrorCache
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 17.10.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.search.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrQuery.SortClause;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+
+import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
+import net.yacy.cora.order.NaturalOrder;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.crawler.data.CrawlProfile;
+import net.yacy.search.index.Fulltext;
+import net.yacy.search.schema.CollectionConfiguration;
+import net.yacy.search.schema.CollectionSchema;
+
+public class ErrorCache {
+
+    private static ConcurrentLog log = new ConcurrentLog("REJECTED");
+    private static final int maxStackSize = 1000;
+
+    // the class object
+    private final LinkedHashMap<String, CollectionConfiguration.FailDoc> stack;
+    private final Fulltext fulltext;
+
+    public ErrorCache(final Fulltext fulltext) {
+        this.fulltext = fulltext;
+        this.stack = new LinkedHashMap<String, CollectionConfiguration.FailDoc>();
+        try {
+            // fill stack with latest values
+            final SolrQuery params = new SolrQuery();
+            params.setParam("defType", "edismax");
+            params.setStart(0);
+            params.setRows(100);
+            params.setFacet(false);
+            params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
+            params.setFacet(false);
+            params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+            QueryResponse rsp = fulltext.getDefaultConnector().getResponseByParams(params);
+            SolrDocumentList docList = rsp == null ? null : rsp.getResults();
+            if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) {
+                CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(docList.get(i));
+                this.stack.put(ASCII.String(failDoc.getDigestURL().hash()), failDoc);
+            }
+        } catch (final Throwable e) {
+        }
+    }
+
+    public void clear() throws IOException {
+        if (this.stack != null) this.stack.clear();
+        this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+    }
+
+    public void remove(final String hash) {
+        if (hash == null) return;
+        this.stack.remove(hash);
+        try {
+            this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + hash + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+        } catch (final IOException e) {
+            return;
+        }
+    }
+
+    public void removeHosts(final Iterable<byte[]> hosthashes) {
+        if (hosthashes == null) return;
+        try {
+            for (byte[] hosthash : hosthashes) {
+                this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+            }
+            Iterator<String> i = ErrorCache.this.stack.keySet().iterator();
+            while (i.hasNext()) {
+                String b = i.next();
+                for (byte[] hosthash : hosthashes) {
+                    if (NaturalOrder.naturalOrder.equal(hosthash, 0, ASCII.getBytes(b), 6, 6)) i.remove();
+                }
+            }
+        } catch (final IOException e) {
+        }
+    }
+
+    public void push(final DigestURL url, final CrawlProfile profile, final FailCategory failCategory, String anycause, final int httpcode) {
+        // assert executor != null; // null == proxy !
+        assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name();
+        if (exists(url.hash()))
+            return; // don't insert double causes
+        if (anycause == null) anycause = "unknown";
+        final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
+        if (!reason.startsWith("double")) log.info(url.toNormalform(true) + " - " + reason);
+        CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(
+                url, profile == null ? null : profile.collections(),
+                failCategory.name() + " " + reason, failCategory.failType,
+                httpcode);
+        this.stack.put(ASCII.String(url.hash()), failDoc);
+        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
+            // send the error to solr
+            try {
+                SolrInputDocument errorDoc = failDoc.toSolr(this.fulltext.getDefaultConfiguration());
+                this.fulltext.getDefaultConnector().add(errorDoc);
+            } catch (final IOException e) {
+                ConcurrentLog.warn("SOLR", "failed to send error " + url.toNormalform(true) + " to solr: " + e.getMessage());
+            }
+        }
+        while (this.stack.size() > maxStackSize)
+            this.stack.remove(this.stack.keySet().iterator());
+    }
+
+    public ArrayList<CollectionConfiguration.FailDoc> list(int max) {
+        final ArrayList<CollectionConfiguration.FailDoc> l = new ArrayList<CollectionConfiguration.FailDoc>();
+        Iterator<CollectionConfiguration.FailDoc> fdi = this.stack.values().iterator();
+        for (int i = 0; i < this.stack.size() - max; i++) fdi.next();
+        while (fdi.hasNext()) l.add(fdi.next());
+        return l;
+    }
+
+    public CollectionConfiguration.FailDoc get(final String urlhash) {
+        CollectionConfiguration.FailDoc fd = this.stack.get(urlhash);
+        if (fd != null) return fd;
+        try {
+            SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(urlhash);
+            if (doc == null) return null;
+            return new CollectionConfiguration.FailDoc(doc);
+        } catch (final IOException e) {
+            ConcurrentLog.logException(e);
+            return null;
+        }
+    }
+
+    public boolean exists(final byte[] urlHash) {
+        try {
+            return this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+        } catch (IOException e) {
+            return false;
+        }
+    }
+
+    public void clearStack() {
+        this.stack.clear();
+    }
+
+    public int stackSize() {
+        return this.stack.size();
+    }
+
+}
+
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@ -80,6 +80,7 @@ import net.yacy.kelondro.util.Bitfield;
 import net.yacy.search.index.Segment;
 import net.yacy.search.index.Segment.ReferenceReport;
 import net.yacy.search.index.Segment.ReferenceReportCache;
+import net.yacy.search.query.QueryParams;
 import net.yacy.search.schema.WebgraphConfiguration.Subgraph;

 import org.apache.solr.common.SolrDocument;
@ -1195,34 +1196,73 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        return il;
    }
    */
-    
-    /**
-     * register an entry as error document
-     * @param digestURI
-     * @param failReason
-     * @param httpstatus
-     * @throws IOException
-     */
-    public SolrInputDocument err(final DigestURL digestURI, final Map<String, Pattern> collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
-        boolean allAttr = this.isEmpty();
-        assert allAttr || contains(CollectionSchema.failreason_s);
-        
-        final SolrInputDocument doc = new SolrInputDocument();
-        String url = addURIAttributes(doc, allAttr, digestURI, Response.docType(digestURI));
-        if (allAttr || contains(CollectionSchema.load_date_dt)) add(doc, CollectionSchema.load_date_dt, new Date());
-        
-        // fail reason and status
-        if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, failReason);
-        if (allAttr || contains(CollectionSchema.failtype_s)) add(doc, CollectionSchema.failtype_s, failType.name());
-        if (allAttr || contains(CollectionSchema.httpstatus_i)) add(doc, CollectionSchema.httpstatus_i, httpstatus);
-        if (allAttr || contains(CollectionSchema.collection_sxt) && collections != null && collections.size() > 0) {
-            List<String> cs = new ArrayList<String>();
-            for (Map.Entry<String, Pattern> e: collections.entrySet()) {
-                if (e.getValue().matcher(url).matches()) cs.add(e.getKey());
-            }
-            add(doc, CollectionSchema.collection_sxt, cs);
-        }
-        return doc;
-    }

+    public static class FailDoc {
+        DigestURL digestURL;
+        final Map<String, Pattern> collections;
+        final String failReason;
+        final FailType failType;
+        final int httpstatus;
+        final Date failtime;
+        public FailDoc(final DigestURL digestURL, final Map<String, Pattern> collections, final String failReason, final FailType failType, final int httpstatus) {
+            this.digestURL = digestURL;
+            this.collections = collections;
+            this.failReason = failReason;
+            this.failType = failType;
+            this.httpstatus = httpstatus;
+            this.failtime = new Date();
+        }
+        public FailDoc(final SolrDocument doc) {
+            try {
+                this.digestURL = new DigestURL((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
+            } catch (MalformedURLException e) {
+                this.digestURL = null;
+            }
+            this.collections = new HashMap<String, Pattern>();
+            Collection<Object> c = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName());
+            for (Object cn: c) this.collections.put((String) cn, QueryParams.catchall_pattern);
+            this.failReason = (String) doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
+            this.failType = FailType.valueOf((String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName()));
+            this.httpstatus = (Integer) doc.getFieldValue(CollectionSchema.httpstatus_i.getSolrFieldName());
+            this.failtime = (Date) doc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
+        }
+        public DigestURL getDigestURL() {
+            return digestURL;
+        }
+        public Map<String, Pattern> getCollections() {
+            return collections;
+        }
+        public String getFailReason() {
+            return failReason;
+        }
+        public FailType getFailType() {
+            return failType;
+        }
+        public int getHttpstatus() {
+            return httpstatus;
+        }
+        public SolrInputDocument toSolr(CollectionConfiguration configuration) {
+            boolean allAttr = configuration.isEmpty();
+            assert allAttr || configuration.contains(CollectionSchema.failreason_s);
+            
+            final SolrInputDocument doc = new SolrInputDocument();
+            String url = configuration.addURIAttributes(doc, allAttr, this.getDigestURL(), Response.docType(this.getDigestURL()));
+            if (allAttr || configuration.contains(CollectionSchema.load_date_dt)) configuration.add(doc, CollectionSchema.load_date_dt, new Date());
+            
+            // fail reason and status
+            if (allAttr || configuration.contains(CollectionSchema.failreason_s)) configuration.add(doc, CollectionSchema.failreason_s, this.getFailReason());
+            if (allAttr || configuration.contains(CollectionSchema.failtype_s)) configuration.add(doc, CollectionSchema.failtype_s, this.getFailType().name());
+            if (allAttr || configuration.contains(CollectionSchema.httpstatus_i)) configuration.add(doc, CollectionSchema.httpstatus_i, this.getHttpstatus());
+            if (allAttr || configuration.contains(CollectionSchema.collection_sxt) && this.getCollections() != null && this.getCollections().size() > 0) {
+                List<String> cs = new ArrayList<String>();
+                for (Map.Entry<String, Pattern> e: this.getCollections().entrySet()) {
+                    if (e.getValue().matcher(url).matches()) cs.add(e.getKey());
+                }
+                configuration.add(doc, CollectionSchema.collection_sxt, cs);
+            }
+            return doc;
+        }
+        
+    }
+    
 }
--- a/source/net/yacy/search/snippet/MediaSnippet.java
+++ b/source/net/yacy/search/snippet/MediaSnippet.java
@ -40,6 +40,7 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.ClientIdentification;
@ -48,8 +49,6 @@ import net.yacy.cora.util.ByteArray;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.NumberTools;
 import net.yacy.cora.util.SpaceExceededException;
-import net.yacy.crawler.data.ZURL.FailCategory;
-import net.yacy.crawler.retrieval.Request;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.WordTokenizer;
@ -59,6 +58,7 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.Switchboard;


+@SuppressWarnings("unused")
 public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
    public ContentDomain type;
    public DigestURL href, source;
@ -260,7 +260,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS

        // check if url is in blacklist
        if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
-            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), null, ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            Switchboard.getSwitchboard().crawlQueues.errorURL.push(url, null, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            ConcurrentLog.fine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
            isBlacklisted = true;
        }