From 2602be8d1e756d6fac20182916706b155d79b34f Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Tue, 17 Sep 2013 15:27:02 +0200
Subject: [PATCH] - removed ZURL data structure; removed also the ZURL data
 file - replaced load failure logging by information which is stored in Solr -
 fixed a bug with crawling of feeds: added must-match pattern application to
 feed urls to filter out such urls which shall not be in a wanted domain -
 delegatedURLs, which also used ZURLs are now temporary objects in memory

---
 htroot/Crawler_p.java                         |  24 +-
 htroot/HostBrowser.java                       |   2 +-
 htroot/IndexCreateParserErrors_p.html         |   4 -
 htroot/IndexCreateParserErrors_p.java         |  25 +-
 htroot/QuickCrawlLink_p.java                  |   3 +-
 htroot/yacy/crawlReceipt.java                 |  11 +-
 htroot/yacy/search.java                       |   4 +-
 htroot/yacy/urls.java                         |  13 +-
 htroot/yacysearch.java                        |   2 +-
 .../yacy/cora/federate/solr/FailCategory.java |  39 ++
 source/net/yacy/crawler/CrawlStacker.java     |  24 +-
 source/net/yacy/crawler/data/CrawlQueues.java |  73 +---
 source/net/yacy/crawler/data/ZURL.java        | 365 ------------------
 .../net/yacy/crawler/retrieval/FTPLoader.java |   4 +-
 .../yacy/crawler/retrieval/HTTPLoader.java    |  24 +-
 .../net/yacy/repository/LoaderDispatcher.java |   4 +-
 source/net/yacy/search/Switchboard.java       | 152 +++-----
 source/net/yacy/search/index/ErrorCache.java  | 173 +++++++++
 .../schema/CollectionConfiguration.java       |  98 +++--
 .../net/yacy/search/snippet/MediaSnippet.java |   6 +-
 20 files changed, 401 insertions(+), 649 deletions(-)
 create mode 100644 source/net/yacy/cora/federate/solr/FailCategory.java
 delete mode 100644 source/net/yacy/crawler/data/ZURL.java
 create mode 100644 source/net/yacy/search/index/ErrorCache.java

diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index e972b5fa0..e2d72fe1b 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -37,6 +37,7 @@ import java.util.regex.PatternSyntaxException;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
@@ -44,8 +45,6 @@ import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.crawler.CrawlSwitchboard;
 import net.yacy.crawler.data.CrawlProfile;
-import net.yacy.crawler.data.ZURL.FailCategory;
-import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.SitemapImporter;
 import net.yacy.data.WorkTables;
 import net.yacy.document.Document;
@@ -392,7 +391,7 @@ public class Crawler_p {
                 for (DigestURL u: rootURLs) {
                     hosthashes.add(ASCII.getBytes(u.hosthash()));
                 }
-                sb.crawlQueues.errorURL.removeHosts(hosthashes, false);
+                sb.crawlQueues.errorURL.removeHosts(hosthashes);
                 for (byte[] hosthash: hosthashes) {
                     try {
                         String deletequery = CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]";
@@ -440,24 +439,7 @@ public class Crawler_p {
                         } else {
                             StringBuilder fr = new StringBuilder();
                             for (Map.Entry<DigestURL, String> failure: failurls.entrySet()) {
-                                sb.crawlQueues.errorURL.push(
-                                    new Request(
-                                            sb.peers.mySeed().hash.getBytes(),
-                                            failure.getKey(),
-                                            null,
-                                            "",
-                                            new Date(),
-                                            profile.handle(),
-                                            0,
-                                            0,
-                                            0,
-                                            0),
-                                    null,
-                                    sb.peers.mySeed().hash.getBytes(),
-                                    new Date(),
-                                    1,
-                                    FailCategory.FINAL_LOAD_CONTEXT,
-                                    failure.getValue(), -1);
+                                sb.crawlQueues.errorURL.push(failure.getKey(), null, FailCategory.FINAL_LOAD_CONTEXT, failure.getValue(), -1);
                                 fr.append(failure.getValue()).append('/');
                             }
     
diff --git a/htroot/HostBrowser.java b/htroot/HostBrowser.java
index 4b4df0c22..c00352f3f 100644
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@@ -439,7 +439,7 @@ public class HostBrowser {
                                 FailType failType = errorDocs.get(entry.getKey());
                                 if (failType == null) {
                                     // maybe this is only in the errorURL
-                                    prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(uri.hash()).anycause() : "unknown error");
+                                    prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())).getFailReason() : "unknown error");
                                 } else {
                                     prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail");
                                 }
diff --git a/htroot/IndexCreateParserErrors_p.html b/htroot/IndexCreateParserErrors_p.html
index 4ac85ed24..1e463e16a 100644
--- a/htroot/IndexCreateParserErrors_p.html
+++ b/htroot/IndexCreateParserErrors_p.html
@@ -32,16 +32,12 @@
       </colgroup>
       <tr class="TableHeader">
         <th>Time</th>
-        <th>Initiator</th>
-        <th>Executor</th>
         <th>URL</th>
         <th>Fail-Reason</th>
       </tr>
       #{list}#
       <tr class="TableCell#(dark)#Light::Dark#(/dark)#">
         <td>#[time]#</td>
-        <td>#[initiator]#</td>
-        <td>#[executor]#</td>
         <td><a href="#[url]#">#[url]#</a></td>
         <td>#[failreason]#</td>
       </tr>
diff --git a/htroot/IndexCreateParserErrors_p.java b/htroot/IndexCreateParserErrors_p.java
index 6a10f44de..acbb9bab3 100644
--- a/htroot/IndexCreateParserErrors_p.java
+++ b/htroot/IndexCreateParserErrors_p.java
@@ -24,15 +24,14 @@
 
 
 import java.util.ArrayList;
+import java.util.Date;
 
 import net.yacy.cora.date.GenericFormatter;
-import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.CrawlStacker;
-import net.yacy.crawler.data.ZURL;
-import net.yacy.peers.Seed;
 import net.yacy.search.Switchboard;
+import net.yacy.search.schema.CollectionConfiguration;
 import net.yacy.server.serverObjects;
 import net.yacy.server.serverSwitch;
 
@@ -73,27 +72,19 @@ public class IndexCreateParserErrors_p {
             }
             dark = true;
             DigestURL url;
-            byte[] initiatorHash, executorHash;
-            Seed initiatorSeed, executorSeed;
             int j=0;
-            ArrayList<ZURL.Entry> l = sb.crawlQueues.errorURL.list(showRejectedCount);
-            ZURL.Entry entry;
+            ArrayList<CollectionConfiguration.FailDoc> l = sb.crawlQueues.errorURL.list(showRejectedCount);
+            CollectionConfiguration.FailDoc entry;
             for (int i = l.size() - 1; i >= 0; i--) {
                 entry = l.get(i);
                 if (entry == null) continue;
-                url = entry.url();
+                url = entry.getDigestURL();
                 if (url == null) continue;
-
-                initiatorHash = entry.initiator();
-                executorHash = entry.executor();
-                initiatorSeed = (initiatorHash == null) ? null : sb.peers.getConnected(ASCII.String(initiatorHash));
-                executorSeed = (executorHash == null) ? null : sb.peers.getConnected(ASCII.String(executorHash));
-                prop.putHTML("rejected_list_"+j+"_time", GenericFormatter.SIMPLE_FORMATTER.format(entry.workdate()));
-                prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName()));
-                prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName()));
+                
+                prop.putHTML("rejected_list_"+j+"_time", GenericFormatter.SIMPLE_FORMATTER.format(new Date()));
                 prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false));
                 
-                String cause = entry.anycause();
+                String cause = entry.getFailReason();
                 if (cause.startsWith(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER)) {
                     prop.put("rejected_list_"+j+"_failreason", "(<a href=\"/RegexTest.html?text=" + url.toNormalform(false) +
                             "&regex=" + cause.substring(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER.length()) + "\">test</a>) " + cause);
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 159b23cdf..3684117b3 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -32,6 +32,7 @@
 import java.net.MalformedURLException;
 import java.util.Date;
 
+import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.federate.yacy.CacheStrategy;
@@ -127,7 +128,7 @@ public class QuickCrawlLink_p {
             final byte[] urlhash = crawlingStartURL.hash();
             indexSegment.fulltext().remove(urlhash);
             sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
-            sb.crawlQueues.errorURL.remove(urlhash);
+            sb.crawlQueues.errorURL.remove(ASCII.String(urlhash));
 
             // create crawling profile
             CrawlProfile pe = null;
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index a3a3318e6..ae8cd6050 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -30,11 +30,11 @@
 import java.io.IOException;
 
 import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.ResultURLs;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.peers.Protocol;
 import net.yacy.peers.Seed;
@@ -161,14 +161,7 @@ public final class crawlReceipt {
         }
 
         sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
-        sb.crawlQueues.errorURL.push(
-                entry.toBalancerEntry(iam),
-                null,
-                youare.getBytes(),
-                null,
-                0,
-                FailCategory.FINAL_LOAD_CONTEXT,
-                result + ":" + reason, -1);
+        sb.crawlQueues.errorURL.push(entry.url(), null, FailCategory.FINAL_LOAD_CONTEXT, result + ":" + reason, -1);
         //switchboard.noticeURL.remove(receivedUrlhash);
         prop.put("delay", "3600");
         return prop;
diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java
index 77316289d..13f40f22b 100644
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@@ -246,7 +246,7 @@ public final class search {
                     false,
                     indexSegment,
                     rankingProfile,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                     false,
                     false,
                     0.0d,
@@ -310,7 +310,7 @@ public final class search {
                     false,
                     sb.index,
                     rankingProfile,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                     false,
                     false,
                     0.0d,
diff --git a/htroot/yacy/urls.java b/htroot/yacy/urls.java
index f14e71225..9bb930116 100644
--- a/htroot/yacy/urls.java
+++ b/htroot/yacy/urls.java
@@ -25,14 +25,11 @@
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 import java.io.IOException;
-import java.util.Date;
-
 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.crawler.data.NoticedURL;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.peers.Protocol;
@@ -80,15 +77,7 @@ public class urls {
                 referrer = sb.getURL(entry.referrerhash());
 
                 // place url to notice-url db
-                sb.crawlQueues.delegatedURL.push(
-                                entry,
-                                null,
-                                sb.peers.mySeed().hash.getBytes(),
-                                new Date(),
-                                0,
-                                FailCategory.FINAL_PROCESS_CONTEXT,
-                                "client=____________",
-                                -1);
+                sb.crawlQueues.delegatedURL.put(ASCII.String(entry.url().hash()), entry.url());
 
                 // create RSS entry
                 prop.put("item_" + c + "_title", "");
diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java
index 6e028f53e..cd9e38ff3 100644
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@@ -663,7 +663,7 @@ public class yacysearch {
                     authenticated,
                     indexSegment,
                     ranking,
-                    header.get(RequestHeader.USER_AGENT, ""),
+                    header.get(HeaderFramework.USER_AGENT, ""),
                     sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false)
                         && sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false)
                         && sb.peers.mySeed().getFlagAcceptRemoteIndex(),
diff --git a/source/net/yacy/cora/federate/solr/FailCategory.java b/source/net/yacy/cora/federate/solr/FailCategory.java
new file mode 100644
index 000000000..cad47f461
--- /dev/null
+++ b/source/net/yacy/cora/federate/solr/FailCategory.java
@@ -0,0 +1,39 @@
+/**
+ *  FailCategory
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 17.10.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.federate.solr;
+
+public enum FailCategory {
+    // TEMPORARY categories are such failure cases that should be tried again
+    // FINAL categories are such failure cases that are final and should not be tried again
+    TEMPORARY_NETWORK_FAILURE(true, FailType.fail), // an entity could not been loaded
+    FINAL_PROCESS_CONTEXT(false, FailType.excl),    // because of a processing context we do not want that url again (i.e. remote crawling)
+    FINAL_LOAD_CONTEXT(false, FailType.excl),       // the crawler configuration does not want to load the entity
+    FINAL_ROBOTS_RULE(true, FailType.excl),         // a remote server denies indexing or loading
+    FINAL_REDIRECT_RULE(true, FailType.excl);       // the remote server redirects this page, thus disallowing reading of content
+
+    public final boolean store;
+    public final FailType failType;
+
+    private FailCategory(boolean store, FailType failType) {
+        this.store = store;
+        this.failType = failType;
+    }
+}
diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java
index 953619fda..c6c903f0c 100644
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@@ -41,6 +41,7 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.ftp.FTPClient;
@@ -49,9 +50,7 @@ import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.CrawlQueues;
 import net.yacy.crawler.data.NoticedURL;
 import net.yacy.crawler.data.ResultURLs;
-import net.yacy.crawler.data.ZURL;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.FTPLoader;
 import net.yacy.crawler.retrieval.HTTPLoader;
 import net.yacy.crawler.retrieval.Request;
@@ -65,6 +64,7 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.repository.FilterEngine;
 import net.yacy.search.Switchboard;
 import net.yacy.search.index.Segment;
+import net.yacy.search.schema.CollectionConfiguration;
 
 public final class CrawlStacker {
     
@@ -75,7 +75,7 @@ public final class CrawlStacker {
     private final ConcurrentLog log = new ConcurrentLog("STACKCRAWL");
     private final RobotsTxt robots;
     private final WorkflowProcessor<Request>  requestQueue;
-    private final CrawlQueues       nextQueue;
+    public  final CrawlQueues       nextQueue;
     private final CrawlSwitchboard  crawler;
     private final Segment           indexSegment;
     private final SeedDB            peers;
@@ -151,7 +151,7 @@ public final class CrawlStacker {
             // if the url was rejected we store it into the error URL db
             if (rejectReason != null && !rejectReason.startsWith("double in")) {
                 final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle()));
-                this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                this.nextQueue.errorURL.push(entry.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
             }
         } catch (final Exception e) {
             CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
@@ -186,7 +186,7 @@ public final class CrawlStacker {
                 this.indexSegment.fulltext().remove(urlhash);
                 byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6);
                 List<byte[]> hosthashes = new ArrayList<byte[]>(); hosthashes.add(hosthash);
-                this.nextQueue.errorURL.removeHosts(hosthashes, false);
+                this.nextQueue.errorURL.removeHosts(hosthashes);
                 this.nextQueue.removeURL(urlhash);
                 String u = url.toNormalform(true);
                 if (u.endsWith("/")) {
@@ -198,7 +198,7 @@ public final class CrawlStacker {
                     final byte[] uh = new DigestURL(u).hash();
                     this.indexSegment.fulltext().remove(uh);
                     this.nextQueue.noticeURL.removeByURLHash(uh);
-                    this.nextQueue.errorURL.remove(uh);
+                    this.nextQueue.errorURL.remove(ASCII.String(uh));
                 } catch (final MalformedURLException e1) {}
             }
 
@@ -246,7 +246,7 @@ public final class CrawlStacker {
                         if (replace) {
                             CrawlStacker.this.indexSegment.fulltext().remove(urlhash);
                             cq.noticeURL.removeByURLHash(urlhash);
-                            cq.errorURL.remove(urlhash);
+                            cq.errorURL.remove(ASCII.String(urlhash));
                         }
 
                         // put entry on crawl stack
@@ -425,8 +425,8 @@ public final class CrawlStacker {
             if (dbocc != null) {
                 // do double-check
                 if (dbocc == HarvestProcess.ERRORS) {
-                    final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
-                    return "double in: errors (" + errorEntry.anycause() + ")";
+                    final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
+                    return "double in: errors (" + errorEntry.getFailReason() + ")";
                 }
                 return "double in: " + dbocc.toString();
             }
@@ -441,9 +441,9 @@ public final class CrawlStacker {
                     return "double in: LURL-DB, oldDate = " + oldDate.toString();
                 }
                 if (dbocc == HarvestProcess.ERRORS) {
-                    final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
-                    if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.anycause());
-                    return "double in: errors (" + errorEntry.anycause() + "), oldDate = " + oldDate.toString();
+                    final CollectionConfiguration.FailDoc errorEntry = this.nextQueue.errorURL.get(ASCII.String(url.hash()));
+                    if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "', previous cause: " + errorEntry.getFailReason());
+                    return "double in: errors (" + errorEntry.getFailReason() + "), oldDate = " + oldDate.toString();
                 }
                 if (this.log.isInfo()) this.log.info("URL '" + urlstring + "' is double registered in '" + dbocc.toString() + "'. ");
                 return "double in: " + dbocc.toString() + ", oldDate = " + oldDate.toString();
diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java
index 04ecfb924..fc355d1ba 100644
--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@@ -40,17 +40,16 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.feed.Hit;
 import net.yacy.cora.document.feed.RSSFeed;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.ConnectionInfo;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.HarvestProcess;
 import net.yacy.crawler.data.NoticedURL.StackType;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.robots.RobotsTxtEntry;
-import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.workflow.WorkflowJob;
 import net.yacy.peers.DHTSelection;
 import net.yacy.peers.Protocol;
@@ -59,19 +58,19 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.IndexingQueueEntry;
 import net.yacy.search.Switchboard;
 import net.yacy.search.SwitchboardConstants;
+import net.yacy.search.index.ErrorCache;
+import net.yacy.search.schema.CollectionConfiguration;
 
 public class CrawlQueues {
 
-    private static final String ERROR_DB_FILENAME = "urlError4.db";
-    private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
-
     private Switchboard sb;
     private ConcurrentLog log;
     private Map<Integer, Loader> workers; // mapping from url hash to Worker thread object
     private final ArrayList<String> remoteCrawlProviderHashes;
 
     public  NoticedURL noticeURL;
-    public  ZURL errorURL, delegatedURL;
+    public  ErrorCache errorURL;
+    public Map<String, DigestURL> delegatedURL;
 
     public CrawlQueues(final Switchboard sb, final File queuePath) {
         this.sb = sb;
@@ -82,10 +81,8 @@ public class CrawlQueues {
         // start crawling management
         this.log.config("Starting Crawling Management");
         this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727);
-        FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(sb.index.fulltext(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
-        this.delegatedURL = new ZURL(sb.index.fulltext(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
-        try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time
+        this.errorURL = new ErrorCache(sb.index.fulltext());
+        this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
     }
 
     public void relocate(final File newQueuePath) {
@@ -95,10 +92,8 @@ public class CrawlQueues {
         this.remoteCrawlProviderHashes.clear();
 
         this.noticeURL = new NoticedURL(newQueuePath, this.sb.useTailCache, this.sb.exceed134217727);
-        FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(this.sb.index.fulltext(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
-        this.delegatedURL = new ZURL(this.sb.index.fulltext(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
-        try {this.errorURL.clear();} catch (IOException e) {} // start with empty errors each time
+        this.errorURL = new ErrorCache(this.sb.index.fulltext());
+        this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
     }
 
     public synchronized void close() {
@@ -114,8 +109,7 @@ public class CrawlQueues {
             }
         }
         this.noticeURL.close();
-        this.errorURL.close();
-        this.delegatedURL.close();
+        this.delegatedURL.clear();
     }
 
     public void clear() {
@@ -130,11 +124,7 @@ public class CrawlQueues {
         } catch (final IOException e) {
             ConcurrentLog.logException(e);
         }
-        try {
-            this.delegatedURL.clear();
-        } catch (final IOException e) {
-            ConcurrentLog.logException(e);
-        }
+        this.delegatedURL.clear();
     }
 
     /**
@@ -143,7 +133,7 @@ public class CrawlQueues {
      * @return if the hash exists, the name of the database is returned, otherwise null is returned
      */
     public HarvestProcess exists(final byte[] hash) {
-        if (this.delegatedURL.exists(hash)) {
+        if (this.delegatedURL.containsKey(ASCII.String(hash))) {
             return HarvestProcess.DELEGATED;
         }
         if (this.errorURL.exists(hash)) {
@@ -164,7 +154,7 @@ public class CrawlQueues {
         assert hash != null && hash.length == 12;
         this.noticeURL.removeByURLHash(hash);
         this.delegatedURL.remove(hash);
-        this.errorURL.remove(hash);
+        this.errorURL.remove(ASCII.String(hash));
     }
 
     public DigestURL getURL(final byte[] urlhash) {
@@ -172,13 +162,13 @@ public class CrawlQueues {
         if (urlhash == null || urlhash.length == 0) {
             return null;
         }
-        ZURL.Entry ee = this.delegatedURL.get(urlhash);
-        if (ee != null) {
-            return ee.url();
+        DigestURL u = this.delegatedURL.get(ASCII.String(urlhash));
+        if (u != null) {
+            return u;
         }
-        ee = this.errorURL.get(urlhash);
+        CollectionConfiguration.FailDoc ee = this.errorURL.get(ASCII.String(urlhash));
         if (ee != null) {
-            return ee.url();
+            return ee.getDigestURL();
         }
         for (final Loader w: this.workers.values()) {
             if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) {
@@ -639,14 +629,7 @@ public class CrawlQueues {
                     (robotsEntry = CrawlQueues.this.sb.robots.getEntry(this.request.url(), this.profile.getAgent())) != null &&
                     robotsEntry.isDisallowed(this.request.url())) {
                     //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
-                    CrawlQueues.this.errorURL.push(
-                            this.request,
-                            profile,
-                            ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                            new Date(),
-                            1,
-                            FailCategory.FINAL_ROBOTS_RULE,
-                            "denied by robots.txt", -1);
+                    CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.FINAL_ROBOTS_RULE, "denied by robots.txt", -1);
                     this.request.setStatus("worker-disallowed", WorkflowJob.STATUS_FINISHED);
                 } else {
                     // starting a load from the internet
@@ -679,28 +662,14 @@ public class CrawlQueues {
                     }
 
                     if (result != null) {
-                        CrawlQueues.this.errorURL.push(
-                                this.request,
-                                profile,
-                                ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                                new Date(),
-                                1,
-                                FailCategory.TEMPORARY_NETWORK_FAILURE,
-                                "cannot load: " + result, -1);
+                        CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "cannot load: " + result, -1);
                         this.request.setStatus("worker-error", WorkflowJob.STATUS_FINISHED);
                     } else {
                         this.request.setStatus("worker-processed", WorkflowJob.STATUS_FINISHED);
                     }
                 }
             } catch (final Exception e) {
-                CrawlQueues.this.errorURL.push(
-                        this.request,
-                        profile,
-                        ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
-                        new Date(),
-                        1,
-                        FailCategory.TEMPORARY_NETWORK_FAILURE,
-                        e.getMessage() + " - in worker", -1);
+                CrawlQueues.this.errorURL.push(this.request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, e.getMessage() + " - in worker", -1);
                 ConcurrentLog.logException(e);
                 this.request.setStatus("worker-exception", WorkflowJob.STATUS_FINISHED);
             } finally {
diff --git a/source/net/yacy/crawler/data/ZURL.java b/source/net/yacy/crawler/data/ZURL.java
deleted file mode 100644
index af0c47a4c..000000000
--- a/source/net/yacy/crawler/data/ZURL.java
+++ /dev/null
@@ -1,365 +0,0 @@
-// plasmaCrawlZURL.java
-// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 15.03.2007 on http://www.anomic.de
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-package net.yacy.crawler.data;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Queue;
-import java.util.concurrent.LinkedBlockingQueue;
-
-import org.apache.solr.common.SolrInputDocument;
-
-import net.yacy.cora.document.encoding.UTF8;
-import net.yacy.cora.document.id.DigestURL;
-import net.yacy.cora.federate.solr.FailType;
-import net.yacy.cora.order.Base64Order;
-import net.yacy.cora.order.NaturalOrder;
-import net.yacy.cora.util.ConcurrentLog;
-import net.yacy.cora.util.SpaceExceededException;
-import net.yacy.crawler.retrieval.Request;
-import net.yacy.kelondro.data.word.Word;
-import net.yacy.kelondro.index.Index;
-import net.yacy.kelondro.index.Row;
-import net.yacy.kelondro.table.SplitTable;
-import net.yacy.kelondro.table.Table;
-import net.yacy.kelondro.util.FileUtils;
-import net.yacy.search.index.Fulltext;
-
-public class ZURL implements Iterable<ZURL.Entry> {
-
-    private static ConcurrentLog log = new ConcurrentLog("REJECTED");
-
-    private static final int EcoFSBufferSize = 2000;
-    private static final int maxStackSize    = 1000;
-
-    public enum FailCategory {
-        // TEMPORARY categories are such failure cases that should be tried again
-        // FINAL categories are such failure cases that are final and should not be tried again
-        TEMPORARY_NETWORK_FAILURE(true, FailType.fail), // an entity could not been loaded
-        FINAL_PROCESS_CONTEXT(false, FailType.excl),    // because of a processing context we do not want that url again (i.e. remote crawling)
-        FINAL_LOAD_CONTEXT(false, FailType.excl),       // the crawler configuration does not want to load the entity
-        FINAL_ROBOTS_RULE(true, FailType.excl),         // a remote server denies indexing or loading
-        FINAL_REDIRECT_RULE(true, FailType.excl);       // the remote server redirects this page, thus disallowing reading of content
-
-        public final boolean store;
-        public final FailType failType;
-
-        private FailCategory(boolean store, FailType failType) {
-            this.store = store;
-            this.failType = failType;
-        }
-    }
-
-    private final static Row rowdef = new Row(
-            "String urlhash-"   + Word.commonHashLength + ", " + // the url's hash
-            "String executor-"  + Word.commonHashLength + ", " + // the crawling executor
-            "Cardinal workdate-8 {b256}, " +                           // the time when the url was last time tried to load
-            "Cardinal workcount-4 {b256}, " +                          // number of load retries
-            "String anycause-132, " +                                   // string describing load failure
-            "byte[] entry-" + Request.rowdef.objectsize,                                          // extra space
-            Base64Order.enhancedCoder
-    );
-
-    // the class object
-    private Index urlIndex;
-    private final Queue<byte[]> stack;
-    private final Fulltext fulltext;
-
-    protected ZURL(
-            final Fulltext fulltext,
-    		final File cachePath,
-    		final String tablename,
-    		final boolean startWithEmptyFile,
-            final boolean useTailCache,
-            final boolean exceed134217727) {
-        this.fulltext = fulltext;
-        // creates a new ZURL in a file
-        cachePath.mkdirs();
-        final File f = new File(cachePath, tablename);
-        if (startWithEmptyFile) {
-            if (f.exists()) {
-                if (f.isDirectory()) SplitTable.delete(cachePath, tablename); else FileUtils.deletedelete(f);
-            }
-        }
-        try {
-            this.urlIndex = new Table(f, rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727, true);
-        } catch (final SpaceExceededException e) {
-            try {
-                this.urlIndex = new Table(f, rowdef, 0, 0, false, exceed134217727, true);
-            } catch (final SpaceExceededException e1) {
-                ConcurrentLog.logException(e1);
-            }
-        }
-        //urlIndex = new kelondroFlexTable(cachePath, tablename, -1, rowdef, 0, true);
-        this.stack = new LinkedBlockingQueue<byte[]>();
-    }
-
-    protected void clear() throws IOException {
-        if (this.urlIndex != null) this.urlIndex.clear();
-        if (this.stack != null) this.stack.clear();
-    }
-
-    protected void close() {
-        try {clear();} catch (final IOException e) {}
-        if (this.urlIndex != null) this.urlIndex.close();
-    }
-
-    public boolean remove(final byte[] hash) {
-        if (hash == null) return false;
-        //System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " remove " + hash);
-        try {
-            Iterator<byte[]> i = ZURL.this.stack.iterator();
-            while (i.hasNext()) {
-                byte[] b = i.next();
-                if (NaturalOrder.naturalOrder.equal(hash, b)) i.remove();
-            }
-            return this.urlIndex.delete(hash);
-        } catch (final IOException e) {
-            return false;
-        }
-    }
-    
-    public void removeHosts(final Iterable<byte[]> hosthashes, final boolean concurrent) {
-        if (hosthashes == null) return;
-        Thread t = new Thread() {
-            public void run() {
-                try {
-                    Iterator<byte[]> i = ZURL.this.urlIndex.keys(true, null);
-                    List<byte[]> r = new ArrayList<byte[]>();
-                    while (i.hasNext()) {
-                        byte[] b = i.next();
-                        for (byte[] hosthash: hosthashes) {
-                            if (NaturalOrder.naturalOrder.equal(hosthash, 0, b, 6, 6)) r.add(b);
-                        }
-                    }
-                    for (byte[] b: r) ZURL.this.urlIndex.remove(b);
-                    i = ZURL.this.stack.iterator();
-                    while (i.hasNext()) {
-                        byte[] b = i.next();
-                        for (byte[] hosthash: hosthashes) {
-                            if (NaturalOrder.naturalOrder.equal(hosthash, 0, b, 6, 6)) i.remove();
-                        }
-                    }
-                } catch (final IOException e) {}
-            }
-        };
-        if (concurrent) t.start(); else t.run();
-    }
-
-    public void push(
-            final Request bentry,
-            final CrawlProfile profile,
-            final byte[] executor,
-            final Date workdate,
-            final int workcount,
-            final FailCategory failCategory,
-            String anycause,
-            final int httpcode) {
-        // assert executor != null; // null == proxy !
-        assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name();
-        if (exists(bentry.url().hash())) return; // don't insert double causes
-        if (anycause == null) anycause = "unknown";
-        final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
-        final Entry entry = new Entry(bentry, executor, workdate, workcount, reason);
-        put(entry);
-        this.stack.add(entry.hash());
-        if (!reason.startsWith("double")) log.info(bentry.url().toNormalform(true) + " - " + reason);
-        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
-            // send the error to solr
-            try {
-                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
-                this.fulltext.getDefaultConnector().add(errorDoc);
-            } catch (final IOException e) {
-                ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
-            }
-        }
-        while (this.stack.size() > maxStackSize) this.stack.poll();
-    }
-
-    @Override
-    public Iterator<ZURL.Entry> iterator() {
-        return new EntryIterator();
-    }
-
-    public ArrayList<ZURL.Entry> list(int max) {
-        final ArrayList<ZURL.Entry> l = new ArrayList<ZURL.Entry>();
-        DigestURL url;
-        for (final ZURL.Entry entry: this) {
-            if (entry == null) continue;
-            url = entry.url();
-            if (url == null) continue;
-            l.add(entry);
-            if (max-- <= 0) l.remove(0);
-        }
-        return l;
-    }
-
-    private class EntryIterator implements Iterator<ZURL.Entry> {
-        private final Iterator<byte[]> hi;
-        public EntryIterator() {
-            this.hi = ZURL.this.stack.iterator();
-        }
-        @Override
-        public boolean hasNext() {
-            return this.hi.hasNext();
-        }
-
-        @Override
-        public ZURL.Entry next() {
-            return get(this.hi.next());
-        }
-
-        @Override
-        public void remove() {
-            this.hi.remove();
-        }
-
-    }
-
-    public ZURL.Entry get(final byte[] urlhash) {
-        try {
-            if (this.urlIndex == null) return null;
-            // System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " get " + urlhash);
-            final Row.Entry entry = this.urlIndex.get(urlhash, false);
-            if (entry == null) return null;
-            return new Entry(entry);
-        } catch (final IOException e) {
-            ConcurrentLog.logException(e);
-            return null;
-        }
-    }
-
-    /**
-     * private put (use push instead)
-     * @param entry
-     */
-    private void put(final Entry entry) {
-        // stores the values from the object variables into the database
-        if (entry.stored) return;
-        if (entry.bentry == null) return;
-        final Row.Entry newrow = rowdef.newEntry();
-        newrow.setCol(0, entry.bentry.url().hash());
-        newrow.setCol(1, entry.executor);
-        newrow.setCol(2, entry.workdate.getTime());
-        newrow.setCol(3, entry.workcount);
-        newrow.setCol(4, UTF8.getBytes(entry.anycause));
-        newrow.setCol(5, entry.bentry.toRow().bytes());
-        try {
-            if (this.urlIndex != null) this.urlIndex.put(newrow);
-            entry.stored = true;
-        } catch (final Exception e) {
-            ConcurrentLog.logException(e);
-        }
-    }
-
-    boolean exists(final byte[] urlHash) {
-        return this.urlIndex.has(urlHash);
-    }
-
-    public void clearStack() {
-        this.stack.clear();
-    }
-
-    public int stackSize() {
-        return this.stack.size();
-    }
-
-    public class Entry {
-
-        private Request bentry;    // the balancer entry
-        private final byte[]   executor;  // the crawling executor
-        private final Date     workdate;  // the time when the url was last time tried to load
-        private final int      workcount; // number of tryings
-        private final String   anycause;  // string describing reason for load fail
-        private boolean  stored;
-
-        private Entry(
-                final Request bentry,
-                final byte[] executor,
-                final Date workdate,
-                final int workcount,
-                final String anycause) {
-            // create new entry
-            assert bentry != null;
-            // assert executor != null; // null == proxy !
-            this.bentry = bentry;
-            this.executor = executor;
-            this.workdate = (workdate == null) ? new Date() : workdate;
-            this.workcount = workcount;
-            this.anycause = (anycause == null) ? "" : anycause;
-            this.stored = false;
-        }
-
-        private Entry(final Row.Entry entry) throws IOException {
-            assert (entry != null);
-            this.executor = entry.getColBytes(1, true);
-            this.workdate = new Date(entry.getColLong(2));
-            this.workcount = (int) entry.getColLong(3);
-            this.anycause = entry.getColUTF8(4);
-            this.bentry = new Request(Request.rowdef.newEntry(entry.getColBytes(5, false)));
-            assert (Base64Order.enhancedCoder.equal(entry.getPrimaryKeyBytes(), this.bentry.url().hash()));
-            this.stored = true;
-            return;
-        }
-
-        public DigestURL url() {
-            return this.bentry.url();
-        }
-
-        public byte[] initiator() {
-            return this.bentry.initiator();
-        }
-
-        private byte[] hash() {
-            // return a url-hash, based on the md5 algorithm
-            // the result is a String of 12 bytes within a 72-bit space
-            // (each byte has an 6-bit range)
-            // that should be enough for all web pages on the world
-            return this.bentry.url().hash();
-        }
-
-        public Date workdate() {
-            return this.workdate;
-        }
-
-        public byte[] executor() {
-            // return the creator's hash
-            return this.executor;
-        }
-
-        public String anycause() {
-            return this.anycause;
-        }
-
-    }
-
-}
-
diff --git a/source/net/yacy/crawler/retrieval/FTPLoader.java b/source/net/yacy/crawler/retrieval/FTPLoader.java
index 148853636..81bc12e68 100644
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@@ -36,6 +36,7 @@ import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.protocol.ResponseHeader;
@@ -43,7 +44,6 @@ import net.yacy.cora.protocol.ftp.FTPClient;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.Latency;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.document.TextParser;
 import net.yacy.search.Switchboard;
 
@@ -156,7 +156,7 @@ public class FTPLoader {
         if (berr.size() > 0 || response == null) {
             // some error logging
             final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
-            this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
             throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
         }
 
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index 2383cc128..5d9982be1 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -25,10 +25,9 @@
 package net.yacy.crawler.retrieval;
 
 import java.io.IOException;
-import java.util.Date;
 
-import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.HeaderFramework;
 import net.yacy.cora.protocol.RequestHeader;
@@ -37,7 +36,6 @@ import net.yacy.cora.protocol.http.HTTPClient;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.data.Latency;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.kelondro.io.ByteCount;
 import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.Switchboard;
@@ -79,10 +77,8 @@ public final class HTTPLoader {
 
     private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
 
-        byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash);
-
         if (retryCount < 0) {
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
             throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
         }
 
@@ -98,7 +94,7 @@ public final class HTTPLoader {
         // check if url is in blacklist
         final String hostlow = host.toLowerCase();
         if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
         }
 
@@ -145,7 +141,7 @@ public final class HTTPLoader {
             redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();
 
             if (redirectionUrlString.isEmpty()) {
-                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
+                this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
                 throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
             }
 
@@ -159,13 +155,13 @@ public final class HTTPLoader {
             this.sb.webStructure.generateCitationReference(url, redirectionUrl);
             
             if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
-                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
+                this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
             }
 
     	    if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
                 // if we are already doing a shutdown we don't need to retry crawling
                 if (Thread.currentThread().isInterrupted()) {
-                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
                     throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                 }
 
@@ -174,11 +170,11 @@ public final class HTTPLoader {
                 return load(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
     	    }
             // we don't want to follow redirects
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
             throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
         } else if (responseBody == null) {
     	    // no response, reject file
-            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
+            this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
             throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
     	} else if (statusCode == 200 || statusCode == 203) {
             // the transfer is ok
@@ -189,7 +185,7 @@ public final class HTTPLoader {
 
             // check length again in case it was not possible to get the length before loading
             if (maxFileSize >= 0 && contentLength > maxFileSize) {
-            	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
+            	this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
             	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
             }
 
@@ -206,7 +202,7 @@ public final class HTTPLoader {
             return response;
     	} else {
             // if the response has not the right response type then reject file
-        	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
+        	this.sb.crawlQueues.errorURL.push(request.url(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
             throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
         }
     }
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 6ad3273ef..7f7d59836 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -42,6 +42,7 @@ import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.HeaderFramework;
@@ -50,7 +51,6 @@ import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.crawler.data.Cache;
 import net.yacy.crawler.data.CrawlProfile;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.FTPLoader;
 import net.yacy.crawler.retrieval.FileLoader;
 import net.yacy.crawler.retrieval.HTTPLoader;
@@ -191,7 +191,7 @@ public final class LoaderDispatcher {
         
         // check if url is in blacklist
         if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
-            this.sb.crawlQueues.errorURL.push(request, crawlProfile, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request.url(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
         }
 
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index f2d7d5943..170d5ef30 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -97,6 +97,7 @@ import net.yacy.cora.document.feed.RSSReader;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.solr.Ranking;
 import net.yacy.cora.federate.solr.SchemaConfiguration;
 import net.yacy.cora.federate.solr.instance.RemoteInstance;
@@ -127,7 +128,6 @@ import net.yacy.crawler.data.ResultImages;
 import net.yacy.crawler.data.ResultURLs;
 import net.yacy.crawler.data.NoticedURL.StackType;
 import net.yacy.crawler.data.ResultURLs.EventOrigin;
-import net.yacy.crawler.data.ZURL.FailCategory;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.crawler.robots.RobotsTxt;
@@ -1789,16 +1789,9 @@ public final class Switchboard extends serverSwitch {
         // in the noIndexReason is set, indexing is not allowed
         if ( noIndexReason != null ) {
             // log cause and close queue
-            final DigestURL referrerURL = response.referrerURL();
             //if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
-            addURLtoErrorDB(
-                response.url(),
-                response.profile(),
-                (referrerURL == null) ? null : referrerURL.hash(),
-                response.initiator(),
-                response.name(),
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                noIndexReason);
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, noIndexReason, -1);
             // finish this entry
             return "not allowed: " + noIndexReason;
         }
@@ -1991,7 +1984,7 @@ public final class Switchboard extends serverSwitch {
     
     public int cleanupJobSize() {
         int c = 1; // "es gibt immer was zu tun"
-        if ( (this.crawlQueues.delegatedURL.stackSize() > 1000) ) {
+        if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
             c++;
         }
         if ( (this.crawlQueues.errorURL.stackSize() > 1000) ) {
@@ -2101,13 +2094,13 @@ public final class Switchboard extends serverSwitch {
 
             // clean up delegated stack
             checkInterruption();
-            if ( (this.crawlQueues.delegatedURL.stackSize() > 1000) ) {
+            if ( (this.crawlQueues.delegatedURL.size() > 1000) ) {
                 if ( this.log.isFine() ) {
                     this.log.fine("Cleaning Delegated-URLs report stack, "
-                        + this.crawlQueues.delegatedURL.stackSize()
+                        + this.crawlQueues.delegatedURL.size()
                         + " entries on stack");
                 }
-                this.crawlQueues.delegatedURL.clearStack();
+                this.crawlQueues.delegatedURL.clear();
             }
 
             // clean up error stack
@@ -2428,7 +2421,6 @@ public final class Switchboard extends serverSwitch {
 
     public IndexingQueueEntry parseDocument(final IndexingQueueEntry in) {
         in.queueEntry.updateStatus(Response.QUEUE_STATE_PARSING);
-
         Document[] documents = null;
         try {
             documents = parseDocument(in.queueEntry);
@@ -2439,7 +2431,7 @@ public final class Switchboard extends serverSwitch {
         }
         if ( documents == null ) {
             return null;
-        }
+        }        
         return new IndexingQueueEntry(in.queueEntry, documents, null);
     }
 
@@ -2465,14 +2457,8 @@ public final class Switchboard extends serverSwitch {
             response.setContent(Cache.getContent(response.url().hash()));
             if ( response.getContent() == null ) {
                 this.log.warn("the resource '" + response.url() + "' is missing in the cache.");
-                addURLtoErrorDB(
-                    response.url(),
-                    response.profile(),
-                    response.referrerHash(),
-                    response.initiator(),
-                    response.name(),
-                    FailCategory.FINAL_LOAD_CONTEXT,
-                    "missing in cache");
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_LOAD_CONTEXT, "missing in cache", -1);
                 return null;
             }
         }
@@ -2490,20 +2476,37 @@ public final class Switchboard extends serverSwitch {
             }
         } catch (final Parser.Failure e ) {
             this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
-            addURLtoErrorDB(
-                response.url(),
-                response.profile(),
-                response.referrerHash(),
-                response.initiator(),
-                response.name(),
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                e.getMessage());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(response.url(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1);
             return null;
         }
-
         final long parsingEndTime = System.currentTimeMillis();
+        
+        
         // put anchors on crawl stack
         final long stackStartTime = System.currentTimeMillis();
+        // check if the documents have valid urls; this is not a bug patch; it is possible that
+        // i.e. the result of a feed parsing results in documents from domains which shall be filtered by the crawl profile
+        if (response.profile() != null) {
+            ArrayList<Document> newDocs = new ArrayList<Document>();
+            for (Document doc: documents) {
+                String rejectReason = this.crawlStacker.checkAcceptance(doc.dc_source(), response.profile(), 1 /*depth is irrelevant here, we just make clear its not the start url*/);
+                if (rejectReason == null) {
+                    newDocs.add(doc);
+                } else {
+                    // we consider this as fail urls to have a tracking of the problem
+                    if (rejectReason != null && !rejectReason.startsWith("double in")) {
+                        final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(response.profile().handle()));
+                        this.crawlStacker.nextQueue.errorURL.push(response.url(), profile, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                    }
+                }
+            }
+            if (newDocs.size() != documents.length) {
+                documents = (Document[]) newDocs.toArray();
+            }
+        }
+        
+        // collect anchors within remaining documents
         if ((processCase == EventOrigin.PROXY_LOAD || processCase == EventOrigin.LOCAL_CRAWLING) &&
             (
                 response.profile() == null ||
@@ -2592,14 +2595,8 @@ public final class Switchboard extends serverSwitch {
         if (!(profile.indexUrlMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN || profile.indexUrlMustMatchPattern().matcher(urls).matches()) ||
              (profile.indexUrlMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.indexUrlMustNotMatchPattern().matcher(urls).matches())) {
             if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
-            addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern(), -1);
             return new IndexingQueueEntry(in.queueEntry, in.documents, null);
         }
         
@@ -2608,27 +2605,15 @@ public final class Switchboard extends serverSwitch {
         docloop: for (final Document document : in.documents) {
             if (document.indexingDenied() && profile.obeyHtmlRobotsNoindex()) {
                 if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule");
-                addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "denied by document-attached noindexing rule");
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "denied by document-attached noindexing rule", -1);
                 continue docloop;
             }
             if (!(profile.indexContentMustMatchPattern() == CrawlProfile.MATCH_ALL_PATTERN || profile.indexContentMustMatchPattern().matcher(document.getTextString()).matches()) ||
                  (profile.indexContentMustNotMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.indexContentMustNotMatchPattern().matcher(document.getTextString()).matches())) {
                 if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
-                addURLtoErrorDB(
-                    in.queueEntry.url(),
-                    profile,
-                    in.queueEntry.referrerHash(),
-                    in.queueEntry.initiator(),
-                    in.queueEntry.name(),
-                    FailCategory.FINAL_PROCESS_CONTEXT,
-                    "indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
+                // create a new errorURL DB entry
+                this.crawlQueues.errorURL.push(in.queueEntry.url(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern(), -1);
                 continue docloop;
             }
             doclist.add(document);
@@ -2705,30 +2690,18 @@ public final class Switchboard extends serverSwitch {
 
         if (condenser == null || (document.indexingDenied() && profile.obeyHtmlRobotsNoindex())) {
             //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
-            addURLtoErrorDB(
-                url,
-                profile,
-                (referrerURL == null) ? null : referrerURL.hash(),
-                queueEntry.initiator(),
-                dc_title,
-                FailCategory.FINAL_PROCESS_CONTEXT,
-                "denied by rule in document, process case=" + processCase);
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(url, profile, FailCategory.FINAL_PROCESS_CONTEXT, "denied by rule in document, process case=" + processCase, -1);
             return;
         }
 
         if ( profile != null && !profile.indexText() && !profile.indexMedia() ) {
             //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
-            addURLtoErrorDB(
-                url,
-                profile,
-                (referrerURL == null) ? null : referrerURL.hash(),
-                queueEntry.initiator(),
-                dc_title,
-                FailCategory.FINAL_LOAD_CONTEXT,
-                "denied by profile rule, process case="
-                    + processCase
-                    + ", profile name = "
-                    + profile.collectionName());
+            // create a new errorURL DB entry
+            this.crawlQueues.errorURL.push(url, profile, FailCategory.FINAL_LOAD_CONTEXT, "denied by profile rule, process case="
+                                + processCase
+                                + ", profile name = "
+                                + profile.collectionName(), -1);
             return;
         }
 
@@ -2906,7 +2879,7 @@ public final class Switchboard extends serverSwitch {
         // remove the document from the error-db
         byte[] hosthash = new byte[6]; System.arraycopy(urlhash, 6, hosthash, 0, 6);
         List<byte[]> hosthashes = new ArrayList<byte[]>(); hosthashes.add(hosthash);
-        this.crawlQueues.errorURL.removeHosts(hosthashes, false);
+        this.crawlQueues.errorURL.removeHosts(hosthashes);
         this.crawlQueues.removeURL(urlhash);
 
         // get a scraper to get the title
@@ -3373,31 +3346,6 @@ public final class Switchboard extends serverSwitch {
         return hasDoneSomething;
     }
 
-    private void addURLtoErrorDB(
-        final DigestURL url,
-        final CrawlProfile profile,
-        final byte[] referrerHash,
-        final byte[] initiator,
-        final String name,
-        final FailCategory failCategory,
-        final String failreason) {
-        // assert initiator != null; // null == proxy
-        // create a new errorURL DB entry
-        final Request bentry =
-            new Request(
-                initiator,
-                url,
-                referrerHash,
-                (name == null) ? "" : name,
-                new Date(),
-                null,
-                0,
-                0,
-                0,
-                0);
-        this.crawlQueues.errorURL.push(bentry, profile, initiator, new Date(), 0, failCategory, failreason, -1);
-    }
-
     public final void heuristicSite(final SearchEvent searchEvent, final String host) {
         new Thread() {
             @Override
diff --git a/source/net/yacy/search/index/ErrorCache.java b/source/net/yacy/search/index/ErrorCache.java
new file mode 100644
index 000000000..e0ac6c42d
--- /dev/null
+++ b/source/net/yacy/search/index/ErrorCache.java
@@ -0,0 +1,173 @@
+/**
+ *  ErrorCache
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 17.10.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.search.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrQuery.SortClause;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+
+import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
+import net.yacy.cora.order.NaturalOrder;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.crawler.data.CrawlProfile;
+import net.yacy.search.index.Fulltext;
+import net.yacy.search.schema.CollectionConfiguration;
+import net.yacy.search.schema.CollectionSchema;
+
+public class ErrorCache {
+
+    private static ConcurrentLog log = new ConcurrentLog("REJECTED");
+    private static final int maxStackSize = 1000;
+
+    // the class object
+    private final LinkedHashMap<String, CollectionConfiguration.FailDoc> stack;
+    private final Fulltext fulltext;
+
+    public ErrorCache(final Fulltext fulltext) {
+        this.fulltext = fulltext;
+        this.stack = new LinkedHashMap<String, CollectionConfiguration.FailDoc>();
+        try {
+            // fill stack with latest values
+            final SolrQuery params = new SolrQuery();
+            params.setParam("defType", "edismax");
+            params.setStart(0);
+            params.setRows(100);
+            params.setFacet(false);
+            params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc));
+            params.setFacet(false);
+            params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+            QueryResponse rsp = fulltext.getDefaultConnector().getResponseByParams(params);
+            SolrDocumentList docList = rsp == null ? null : rsp.getResults();
+            if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) {
+                CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(docList.get(i));
+                this.stack.put(ASCII.String(failDoc.getDigestURL().hash()), failDoc);
+            }
+        } catch (final Throwable e) {
+        }
+    }
+
+    public void clear() throws IOException {
+        if (this.stack != null) this.stack.clear();
+        this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+    }
+
+    public void remove(final String hash) {
+        if (hash == null) return;
+        this.stack.remove(hash);
+        try {
+            this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + hash + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+        } catch (final IOException e) {
+            return;
+        }
+    }
+
+    public void removeHosts(final Iterable<byte[]> hosthashes) {
+        if (hosthashes == null) return;
+        try {
+            for (byte[] hosthash : hosthashes) {
+                this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+            }
+            Iterator<String> i = ErrorCache.this.stack.keySet().iterator();
+            while (i.hasNext()) {
+                String b = i.next();
+                for (byte[] hosthash : hosthashes) {
+                    if (NaturalOrder.naturalOrder.equal(hosthash, 0, ASCII.getBytes(b), 6, 6)) i.remove();
+                }
+            }
+        } catch (final IOException e) {
+        }
+    }
+
+    public void push(final DigestURL url, final CrawlProfile profile, final FailCategory failCategory, String anycause, final int httpcode) {
+        // assert executor != null; // null == proxy !
+        assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name();
+        if (exists(url.hash()))
+            return; // don't insert double causes
+        if (anycause == null) anycause = "unknown";
+        final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : "");
+        if (!reason.startsWith("double")) log.info(url.toNormalform(true) + " - " + reason);
+        CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(
+                url, profile == null ? null : profile.collections(),
+                failCategory.name() + " " + reason, failCategory.failType,
+                httpcode);
+        this.stack.put(ASCII.String(url.hash()), failDoc);
+        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
+            // send the error to solr
+            try {
+                SolrInputDocument errorDoc = failDoc.toSolr(this.fulltext.getDefaultConfiguration());
+                this.fulltext.getDefaultConnector().add(errorDoc);
+            } catch (final IOException e) {
+                ConcurrentLog.warn("SOLR", "failed to send error " + url.toNormalform(true) + " to solr: " + e.getMessage());
+            }
+        }
+        while (this.stack.size() > maxStackSize)
+            this.stack.remove(this.stack.keySet().iterator());
+    }
+
+    public ArrayList<CollectionConfiguration.FailDoc> list(int max) {
+        final ArrayList<CollectionConfiguration.FailDoc> l = new ArrayList<CollectionConfiguration.FailDoc>();
+        Iterator<CollectionConfiguration.FailDoc> fdi = this.stack.values().iterator();
+        for (int i = 0; i < this.stack.size() - max; i++) fdi.next();
+        while (fdi.hasNext()) l.add(fdi.next());
+        return l;
+    }
+
+    public CollectionConfiguration.FailDoc get(final String urlhash) {
+        CollectionConfiguration.FailDoc fd = this.stack.get(urlhash);
+        if (fd != null) return fd;
+        try {
+            SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(urlhash);
+            if (doc == null) return null;
+            return new CollectionConfiguration.FailDoc(doc);
+        } catch (final IOException e) {
+            ConcurrentLog.logException(e);
+            return null;
+        }
+    }
+
+    public boolean exists(final byte[] urlHash) {
+        try {
+            return this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]");
+        } catch (IOException e) {
+            return false;
+        }
+    }
+
+    public void clearStack() {
+        this.stack.clear();
+    }
+
+    public int stackSize() {
+        return this.stack.size();
+    }
+
+}
+
diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java
index 9c2db013d..36273649b 100644
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@@ -80,6 +80,7 @@ import net.yacy.kelondro.util.Bitfield;
 import net.yacy.search.index.Segment;
 import net.yacy.search.index.Segment.ReferenceReport;
 import net.yacy.search.index.Segment.ReferenceReportCache;
+import net.yacy.search.query.QueryParams;
 import net.yacy.search.schema.WebgraphConfiguration.Subgraph;
 
 import org.apache.solr.common.SolrDocument;
@@ -1195,34 +1196,73 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
         return il;
     }
     */
-    
-    /**
-     * register an entry as error document
-     * @param digestURI
-     * @param failReason
-     * @param httpstatus
-     * @throws IOException
-     */
-    public SolrInputDocument err(final DigestURL digestURI, final Map<String, Pattern> collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
-        boolean allAttr = this.isEmpty();
-        assert allAttr || contains(CollectionSchema.failreason_s);
-        
-        final SolrInputDocument doc = new SolrInputDocument();
-        String url = addURIAttributes(doc, allAttr, digestURI, Response.docType(digestURI));
-        if (allAttr || contains(CollectionSchema.load_date_dt)) add(doc, CollectionSchema.load_date_dt, new Date());
-        
-        // fail reason and status
-        if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, failReason);
-        if (allAttr || contains(CollectionSchema.failtype_s)) add(doc, CollectionSchema.failtype_s, failType.name());
-        if (allAttr || contains(CollectionSchema.httpstatus_i)) add(doc, CollectionSchema.httpstatus_i, httpstatus);
-        if (allAttr || contains(CollectionSchema.collection_sxt) && collections != null && collections.size() > 0) {
-            List<String> cs = new ArrayList<String>();
-            for (Map.Entry<String, Pattern> e: collections.entrySet()) {
-                if (e.getValue().matcher(url).matches()) cs.add(e.getKey());
-            }
-            add(doc, CollectionSchema.collection_sxt, cs);
-        }
-        return doc;
-    }
 
+    public static class FailDoc {
+        DigestURL digestURL;
+        final Map<String, Pattern> collections;
+        final String failReason;
+        final FailType failType;
+        final int httpstatus;
+        final Date failtime;
+        public FailDoc(final DigestURL digestURL, final Map<String, Pattern> collections, final String failReason, final FailType failType, final int httpstatus) {
+            this.digestURL = digestURL;
+            this.collections = collections;
+            this.failReason = failReason;
+            this.failType = failType;
+            this.httpstatus = httpstatus;
+            this.failtime = new Date();
+        }
+        public FailDoc(final SolrDocument doc) {
+            try {
+                this.digestURL = new DigestURL((String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
+            } catch (MalformedURLException e) {
+                this.digestURL = null;
+            }
+            this.collections = new HashMap<String, Pattern>();
+            Collection<Object> c = doc.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName());
+            for (Object cn: c) this.collections.put((String) cn, QueryParams.catchall_pattern);
+            this.failReason = (String) doc.getFieldValue(CollectionSchema.failreason_s.getSolrFieldName());
+            this.failType = FailType.valueOf((String) doc.getFieldValue(CollectionSchema.failtype_s.getSolrFieldName()));
+            this.httpstatus = (Integer) doc.getFieldValue(CollectionSchema.httpstatus_i.getSolrFieldName());
+            this.failtime = (Date) doc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
+        }
+        public DigestURL getDigestURL() {
+            return digestURL;
+        }
+        public Map<String, Pattern> getCollections() {
+            return collections;
+        }
+        public String getFailReason() {
+            return failReason;
+        }
+        public FailType getFailType() {
+            return failType;
+        }
+        public int getHttpstatus() {
+            return httpstatus;
+        }
+        public SolrInputDocument toSolr(CollectionConfiguration configuration) {
+            boolean allAttr = configuration.isEmpty();
+            assert allAttr || configuration.contains(CollectionSchema.failreason_s);
+            
+            final SolrInputDocument doc = new SolrInputDocument();
+            String url = configuration.addURIAttributes(doc, allAttr, this.getDigestURL(), Response.docType(this.getDigestURL()));
+            if (allAttr || configuration.contains(CollectionSchema.load_date_dt)) configuration.add(doc, CollectionSchema.load_date_dt, new Date());
+            
+            // fail reason and status
+            if (allAttr || configuration.contains(CollectionSchema.failreason_s)) configuration.add(doc, CollectionSchema.failreason_s, this.getFailReason());
+            if (allAttr || configuration.contains(CollectionSchema.failtype_s)) configuration.add(doc, CollectionSchema.failtype_s, this.getFailType().name());
+            if (allAttr || configuration.contains(CollectionSchema.httpstatus_i)) configuration.add(doc, CollectionSchema.httpstatus_i, this.getHttpstatus());
+            if (allAttr || configuration.contains(CollectionSchema.collection_sxt) && this.getCollections() != null && this.getCollections().size() > 0) {
+                List<String> cs = new ArrayList<String>();
+                for (Map.Entry<String, Pattern> e: this.getCollections().entrySet()) {
+                    if (e.getValue().matcher(url).matches()) cs.add(e.getKey());
+                }
+                configuration.add(doc, CollectionSchema.collection_sxt, cs);
+            }
+            return doc;
+        }
+        
+    }
+    
 }
diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java
index 0be262a08..cf4dd21a8 100644
--- a/source/net/yacy/search/snippet/MediaSnippet.java
+++ b/source/net/yacy/search/snippet/MediaSnippet.java
@@ -40,6 +40,7 @@ import net.yacy.cora.document.analysis.Classification.ContentDomain;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.AnchorURL;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.federate.yacy.CacheStrategy;
 import net.yacy.cora.order.Base64Order;
 import net.yacy.cora.protocol.ClientIdentification;
@@ -48,8 +49,6 @@ import net.yacy.cora.util.ByteArray;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.NumberTools;
 import net.yacy.cora.util.SpaceExceededException;
-import net.yacy.crawler.data.ZURL.FailCategory;
-import net.yacy.crawler.retrieval.Request;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.WordTokenizer;
@@ -59,6 +58,7 @@ import net.yacy.repository.Blacklist.BlacklistType;
 import net.yacy.search.Switchboard;
 
 
+@SuppressWarnings("unused")
 public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
     public ContentDomain type;
     public DigestURL href, source;
@@ -260,7 +260,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
 
         // check if url is in blacklist
         if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
-            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), null, ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            Switchboard.getSwitchboard().crawlQueues.errorURL.push(url, null, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
             ConcurrentLog.fine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
             isBlacklisted = true;
         }