/** * ErrorCache * Copyright 2013 by Michael Peter Christen * First released 17.10.2013 at http://yacy.net * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see . */ package net.yacy.search.index; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.SortClause; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.solr.FailCategory; import net.yacy.cora.order.NaturalOrder; import net.yacy.cora.util.ConcurrentLog; import net.yacy.crawler.data.CrawlProfile; import net.yacy.search.index.Fulltext; import net.yacy.search.schema.CollectionConfiguration; import net.yacy.search.schema.CollectionSchema; public class ErrorCache { private static ConcurrentLog log = new ConcurrentLog("REJECTED"); private static final int maxStackSize = 1000; // the class object private final Map stack; private final Fulltext fulltext; public ErrorCache(final Fulltext fulltext) { this.fulltext = fulltext; this.stack = new LinkedHashMap(); try { // fill stack with latest values final SolrQuery params = new SolrQuery(); params.setParam("defType", "edismax"); params.setStart(0); params.setRows(100); params.setFacet(false); params.setSort(new SortClause(CollectionSchema.last_modified.getSolrFieldName(), SolrQuery.ORDER.desc)); params.setFacet(false); params.setQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); QueryResponse rsp = fulltext.getDefaultConnector().getResponseByParams(params); SolrDocumentList docList = rsp == null ? null : rsp.getResults(); if (docList != null) for (int i = docList.size() - 1; i >= 0; i--) { CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc(docList.get(i)); this.stack.put(ASCII.String(failDoc.getDigestURL().hash()), failDoc); } } catch (final Throwable e) { } } public void clear() throws IOException { if (this.stack != null) synchronized (this.stack) {this.stack.clear();} this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); } public void removeHost(final byte[] hosthash) { if (hosthash == null) return; try { this.fulltext.getDefaultConnector().deleteByQuery(CollectionSchema.host_id_s.getSolrFieldName() + ":\"" + ASCII.String(hosthash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); synchronized (this.stack) { Iterator i = ErrorCache.this.stack.keySet().iterator(); while (i.hasNext()) { String b = i.next(); if (NaturalOrder.naturalOrder.equal(hosthash, 0, ASCII.getBytes(b), 6, 6)) i.remove(); } } } catch (final IOException e) { } } public void push(final DigestURL url, final CrawlProfile profile, final FailCategory failCategory, String anycause, final int httpcode) { // assert executor != null; // null == proxy ! assert failCategory.store || httpcode == -1 : "failCategory=" + failCategory.name(); if (exists(url.hash())) return; // don't insert double causes if (anycause == null) anycause = "unknown"; final String reason = anycause + ((httpcode >= 0) ? " (http return code = " + httpcode + ")" : ""); if (!reason.startsWith("double")) log.info(url.toNormalform(true) + " - " + reason); CollectionConfiguration.FailDoc failDoc = new CollectionConfiguration.FailDoc( url, profile == null ? null : profile.collections(), failCategory.name() + " " + reason, failCategory.failType, httpcode); synchronized (this.stack) { this.stack.put(ASCII.String(url.hash()), failDoc); } if (this.fulltext.getDefaultConnector() != null && failCategory.store) { // send the error to solr try { SolrInputDocument errorDoc = failDoc.toSolr(this.fulltext.getDefaultConfiguration()); this.fulltext.getDefaultConnector().add(errorDoc); } catch (final IOException e) { ConcurrentLog.warn("SOLR", "failed to send error " + url.toNormalform(true) + " to solr: " + e.getMessage()); } } checkStackSize(); } private void checkStackSize() { synchronized (this.stack) { int dc = this.stack.size() - maxStackSize; if (dc > 0) { Collection d = new ArrayList(); Iterator i = this.stack.keySet().iterator(); while (dc-- > 0 && i.hasNext()) d.add(i.next()); for (String s: d) this.stack.remove(s); } } } public ArrayList list(int max) { final ArrayList l = new ArrayList(); synchronized (this.stack) { Iterator fdi = this.stack.values().iterator(); for (int i = 0; i < this.stack.size() - max; i++) fdi.next(); while (fdi.hasNext()) l.add(fdi.next()); } return l; } public CollectionConfiguration.FailDoc get(final String urlhash) { CollectionConfiguration.FailDoc fd; synchronized (this.stack) { fd = this.stack.get(urlhash); } if (fd != null) return fd; try { SolrDocument doc = this.fulltext.getDefaultConnector().getDocumentById(urlhash); if (doc == null) return null; return new CollectionConfiguration.FailDoc(doc); } catch (final IOException e) { ConcurrentLog.logException(e); return null; } } public boolean exists(final byte[] urlHash) { try { return this.fulltext.getDefaultConnector().existsByQuery(CollectionSchema.id.getSolrFieldName() + ":\"" + ASCII.String(urlHash) + "\" AND " + CollectionSchema.failreason_s.getSolrFieldName() + ":[* TO *]"); } catch (IOException e) { return false; } } public void clearStack() { synchronized (this.stack) { this.stack.clear(); } } public int stackSize() { synchronized (this.stack) { return this.stack.size(); } } }