added collection_sxt to error documents

2024-09-19 00:01:41 +02:00 · 2013-07-17 15:20:56 +02:00 · 2013-07-17 15:20:56 +02:00 · 89c0aa0e74
commit 89c0aa0e74
parent 0df5195cb0
12 changed files with 47 additions and 28 deletions
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -444,6 +444,7 @@ public class Crawler_p {
                                            0,
                                            0,
                                            0),
+                                    null,
                                    sb.peers.mySeed().hash.getBytes(),
                                    new Date(),
                                    1,
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@ -163,6 +163,7 @@ public final class crawlReceipt {
        sb.crawlQueues.delegatedURL.remove(entry.hash()); // the delegated work is transformed into an error case
        sb.crawlQueues.errorURL.push(
                entry.toBalancerEntry(iam),
+                null,
                youare.getBytes(),
                null,
                0,
--- a/htroot/yacy/urls.java
+++ b/htroot/yacy/urls.java
@ -82,6 +82,7 @@ public class urls {
                // place url to notice-url db
                sb.crawlQueues.delegatedURL.push(
                                entry,
+                                null,
                                sb.peers.mySeed().hash.getBytes(),
                                new Date(),
                                0,
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@ -149,7 +149,8 @@ public final class CrawlStacker {

            // if the url was rejected we store it into the error URL db
            if (rejectReason != null) {
-                this.nextQueue.errorURL.push(entry, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
+                final CrawlProfile profile = this.crawler.getActive(UTF8.getBytes(entry.profileHandle()));
+                this.nextQueue.errorURL.push(entry, profile, ASCII.getBytes(this.peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, rejectReason, -1);
            }
        } catch (final Exception e) {
            CrawlStacker.this.log.warn("Error while processing stackCrawl entry.\n" + "Entry: " + entry.toString() + "Error: " + e.toString(), e);
--- a/source/net/yacy/crawler/data/CrawlQueues.java
+++ b/source/net/yacy/crawler/data/CrawlQueues.java
@ -612,6 +612,7 @@ public class CrawlQueues {
        private Request request;
        private final Integer code;
        private final long start;
+        private final CrawlProfile profile;

        private Loader(final Request entry) {
            this.start = System.currentTimeMillis();
@ -619,6 +620,7 @@ public class CrawlQueues {
            this.request.setStatus("worker-initialized", WorkflowJob.STATUS_INITIATED);
            this.code = Integer.valueOf(entry.hashCode());
            this.setPriority(Thread.MIN_PRIORITY); // http requests from the crawler should not cause that other functions work worse
+            this.profile = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
        }

        private long age() {
@ -637,6 +639,7 @@ public class CrawlQueues {
                    //if (log.isFine()) log.logFine("Crawling of URL '" + request.url().toString() + "' disallowed by robots.txt.");
                    CrawlQueues.this.errorURL.push(
                            this.request,
+                            profile,
                            ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                            new Date(),
                            1,
@ -652,8 +655,7 @@ public class CrawlQueues {
                    // returns null if everything went fine, a fail reason string if a problem occurred
                    try {
                        this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
-                        final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
-                        final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
+                        final Response response = CrawlQueues.this.sb.loader.load(this.request, profile == null ? CacheStrategy.IFEXIST : profile.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
                        if (response == null) {
                            this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                            if (CrawlQueues.this.log.isFine()) {
@ -677,6 +679,7 @@ public class CrawlQueues {
                    if (result != null) {
                        CrawlQueues.this.errorURL.push(
                                this.request,
+                                profile,
                                ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                                new Date(),
                                1,
@ -690,6 +693,7 @@ public class CrawlQueues {
            } catch (final Exception e) {
                CrawlQueues.this.errorURL.push(
                        this.request,
+                        profile,
                        ASCII.getBytes(CrawlQueues.this.sb.peers.mySeed().hash),
                        new Date(),
                        1,
--- a/source/net/yacy/crawler/data/ZURL.java
+++ b/source/net/yacy/crawler/data/ZURL.java
@ -172,6 +172,7 @@ public class ZURL implements Iterable<ZURL.Entry> {

    public void push(
            final Request bentry,
+            final CrawlProfile profile,
            final byte[] executor,
            final Date workdate,
            final int workcount,
@ -190,7 +191,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
        if (this.fulltext.getDefaultConnector() != null && failCategory.store) {
            // send the error to solr
            try {
-                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
+                SolrInputDocument errorDoc = this.fulltext.getDefaultConfiguration().err(bentry.url(), profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode);
                this.fulltext.getDefaultConnector().add(errorDoc);
            } catch (final IOException e) {
                ConcurrentLog.warn("SOLR", "failed to send error " + bentry.url().toNormalform(true) + " to solr: " + e.getMessage());
--- a/source/net/yacy/crawler/retrieval/FTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/FTPLoader.java
@ -101,6 +101,7 @@ public class FTPLoader {
        // create new ftp client
        final FTPClient ftpClient = new FTPClient();

+        final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
        // get a connection
        if (openConnection(ftpClient, entryUrl)) {
            // test if the specified file is a directory
@ -130,7 +131,6 @@ public class FTPLoader {
                    final ResponseHeader responseHeader = new ResponseHeader(200);
                    responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
                    responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
-                    final CrawlProfile profile = this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
                    response = new Response(
                            request,
                            requestHeader,
@ -156,7 +156,7 @@ public class FTPLoader {
        if (berr.size() > 0 || response == null) {
            // some error logging
            final String detail = (berr.size() > 0) ? "Errorlog: " + berr.toString() : "";
-            this.sb.crawlQueues.errorURL.push(request, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, ASCII.getBytes(this.sb.peers.mySeed().hash), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, " ftp server download, " + detail, -1);
            throw new IOException("FTPLoader: Unable to download URL '" + request.url().toString() + "': " + detail);
        }

--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@ -70,20 +70,20 @@ public final class HTTPLoader {
        this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 30000);
    }

-    public Response load(final Request entry, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    public Response load(final Request entry, CrawlProfile profile, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
        Latency.updateBeforeLoad(entry.url());
        final long start = System.currentTimeMillis();
-        final Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
+        final Response doc = load(entry, profile, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, blacklistType, timeout);
        Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
        return doc;
    }

-    private Response load(final Request request, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {
+    private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, int timeout) throws IOException {

        byte[] myHash = ASCII.getBytes(this.sb.peers.mySeed().hash);

        if (retryCount < 0) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
            throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
        }

@ -99,7 +99,7 @@ public final class HTTPLoader {
        // check if url is in blacklist
        final String hostlow = host.toLowerCase();
        if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }

@ -146,7 +146,7 @@ public final class HTTPLoader {
            redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();

            if (redirectionUrlString.isEmpty()) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
                throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
            }

@ -160,32 +160,32 @@ public final class HTTPLoader {
            this.sb.webStructure.generateCitationReference(url, redirectionUrl);
            
            if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
-                this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
+                this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
            }

    	    if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
                // if we are already doing a shutdown we don't need to retry crawling
                if (Thread.currentThread().isInterrupted()) {
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
                    throw new IOException("CRAWLER Retry of URL=" + requestURLString + " aborted because of server shutdown.");
                }

                // check if the url was already loaded
                if (Cache.has(redirectionUrl.hash())) { // customer request
-                    this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
+                    this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection to double content", statusCode);
                    throw new IOException("CRAWLER Redirection of URL=" + requestURLString + " ignored. The url appears already in htcache");
                }

                // retry crawling with new url
                request.redirectURL(redirectionUrl);
-                return load(request, retryCount - 1, maxFileSize, blacklistType, timeout);
+                return load(request, profile, retryCount - 1, maxFileSize, blacklistType, timeout);
    	    }
            // we don't want to follow redirects
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
            throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        } else if (responseBody == null) {
    	    // no response, reject file
-            this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
+            this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "no response body", statusCode);
            throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
    	} else if (statusCode == 200 || statusCode == 203) {
            // the transfer is ok
@ -196,12 +196,11 @@ public final class HTTPLoader {

            // check length again in case it was not possible to get the length before loading
            if (maxFileSize >= 0 && contentLength > maxFileSize) {
-            	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
+            	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.FINAL_PROCESS_CONTEXT, "file size limit exceeded", statusCode);
            	throw new IOException("REJECTED URL " + request.url() + " because file size '" + contentLength + "' exceeds max filesize limit of " + maxFileSize + " bytes. (GET)");
            }

            // create a new cache entry
-            final CrawlProfile profile = request.profileHandle() == null ? null : this.sb.crawler.getActive(ASCII.getBytes(request.profileHandle()));
            response = new Response(
                    request,
                    requestHeader,
@ -214,7 +213,7 @@ public final class HTTPLoader {
            return response;
    	} else {
            // if the response has not the right response type then reject file
-        	this.sb.crawlQueues.errorURL.push(request, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
+        	this.sb.crawlQueues.errorURL.push(request, profile, myHash, new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
            throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + requestURLString);
        }
    }
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@ -186,15 +186,15 @@ public final class LoaderDispatcher {
        if (url.isFile() || url.isSMB()) cacheStrategy = CacheStrategy.NOCACHE; // load just from the file system
        final String protocol = url.getProtocol();
        final String host = url.getHost();
-
+        final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
+        
        // check if url is in blacklist
        if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
-            this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            this.sb.crawlQueues.errorURL.push(request, crawlProfile, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
        }

        // check if we have the page in the cache
-        final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
        if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) {
            // we have passed a first test if caching is allowed
            // now see if there is a cache entry
@ -280,7 +280,7 @@ public final class LoaderDispatcher {
        // load resource from the internet
        Response response = null;
        if (protocol.equals("http") || protocol.equals("https")) {
-            response = this.httpLoader.load(request, maxFileSize, blacklistType, timeout);
+            response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, timeout);
        } else if (protocol.equals("ftp")) {
            response = this.ftpLoader.load(request, true);
        } else if (protocol.equals("smb")) {
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -1803,6 +1803,7 @@ public final class Switchboard extends serverSwitch {
            //if (log.isFine()) log.logFine("deQueue: not indexed any word in URL " + response.url() + "; cause: " + noIndexReason);
            addURLtoErrorDB(
                response.url(),
+                response.profile(),
                (referrerURL == null) ? null : referrerURL.hash(),
                response.initiator(),
                response.name(),
@ -2474,6 +2475,7 @@ public final class Switchboard extends serverSwitch {
                this.log.warn("the resource '" + response.url() + "' is missing in the cache.");
                addURLtoErrorDB(
                    response.url(),
+                    response.profile(),
                    response.referrerHash(),
                    response.initiator(),
                    response.name(),
@ -2498,6 +2500,7 @@ public final class Switchboard extends serverSwitch {
            this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
            addURLtoErrorDB(
                response.url(),
+                response.profile(),
                response.referrerHash(),
                response.initiator(),
                response.name(),
@ -2597,6 +2600,7 @@ public final class Switchboard extends serverSwitch {
            if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
            addURLtoErrorDB(
                    in.queueEntry.url(),
+                    profile,
                    in.queueEntry.referrerHash(),
                    in.queueEntry.initiator(),
                    in.queueEntry.name(),
@ -2612,6 +2616,7 @@ public final class Switchboard extends serverSwitch {
                if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': denied by document-attached noindexing rule");
                addURLtoErrorDB(
                    in.queueEntry.url(),
+                    profile,
                    in.queueEntry.referrerHash(),
                    in.queueEntry.initiator(),
                    in.queueEntry.name(),
@ -2624,6 +2629,7 @@ public final class Switchboard extends serverSwitch {
                if (this.log.isInfo()) this.log.info("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on content; indexContentMustMatchPattern = " + profile.indexContentMustMatchPattern().pattern() + ", indexContentMustNotMatchPattern = " + profile.indexContentMustNotMatchPattern().pattern());
                addURLtoErrorDB(
                    in.queueEntry.url(),
+                    profile,
                    in.queueEntry.referrerHash(),
                    in.queueEntry.initiator(),
                    in.queueEntry.name(),
@ -2707,6 +2713,7 @@ public final class Switchboard extends serverSwitch {
            //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by rule in document, process case=" + processCase);
            addURLtoErrorDB(
                url,
+                profile,
                (referrerURL == null) ? null : referrerURL.hash(),
                queueEntry.initiator(),
                dc_title,
@ -2719,6 +2726,7 @@ public final class Switchboard extends serverSwitch {
            //if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
            addURLtoErrorDB(
                url,
+                profile,
                (referrerURL == null) ? null : referrerURL.hash(),
                queueEntry.initiator(),
                dc_title,
@ -3361,6 +3369,7 @@ public final class Switchboard extends serverSwitch {

    private void addURLtoErrorDB(
        final DigestURI url,
+        final CrawlProfile profile,
        final byte[] referrerHash,
        final byte[] initiator,
        final String name,
@ -3380,7 +3389,7 @@ public final class Switchboard extends serverSwitch {
                0,
                0,
                0);
-        this.crawlQueues.errorURL.push(bentry, initiator, new Date(), 0, failCategory, failreason, -1);
+        this.crawlQueues.errorURL.push(bentry, profile, initiator, new Date(), 0, failCategory, failreason, -1);
    }

    public final void heuristicSite(final SearchEvent searchEvent, final String host) {
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@ -64,6 +64,7 @@ import net.yacy.cora.storage.HandleSet;
 import net.yacy.cora.util.CommonPattern;
 import net.yacy.cora.util.ConcurrentLog;
 import net.yacy.cora.util.SpaceExceededException;
+import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.crawler.retrieval.Response;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@ -1191,7 +1192,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
     * @param httpstatus
     * @throws IOException
     */
-    public SolrInputDocument err(final DigestURI digestURI, final String failReason, final FailType failType, final int httpstatus) throws IOException {
+    public SolrInputDocument err(final DigestURI digestURI, String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
        final SolrInputDocument solrdoc = new SolrInputDocument();
        add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash()));
        add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true));
@ -1212,6 +1213,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        if (contains(CollectionSchema.failreason_s)) add(solrdoc, CollectionSchema.failreason_s, failReason);
        if (contains(CollectionSchema.failtype_s)) add(solrdoc, CollectionSchema.failtype_s, failType.name());
        if (contains(CollectionSchema.httpstatus_i)) add(solrdoc, CollectionSchema.httpstatus_i, httpstatus);
+        if (contains(CollectionSchema.collection_sxt)) add(solrdoc, CollectionSchema.collection_sxt, collections);
        return solrdoc;
    }

--- a/source/net/yacy/search/snippet/MediaSnippet.java
+++ b/source/net/yacy/search/snippet/MediaSnippet.java
@ -259,7 +259,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS

        // check if url is in blacklist
        if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
-            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
+            Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), null, ASCII.getBytes(Switchboard.getSwitchboard().peers.mySeed().hash), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
            ConcurrentLog.fine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
            isBlacklisted = true;
        }