do not cache search requests to Solr if the result is used for

doublechecking. If a double-check comes from cached results the doublecheck fails.
2024-09-19 00:01:41 +02:00 · 2014-11-20 18:45:27 +01:00 · 2014-11-20 18:45:27 +01:00 · 70f03f7c8e
commit 70f03f7c8e
parent a0b84e4def
5 changed files with 8 additions and 8 deletions
--- a/htroot/IndexDeletion_p.java
+++ b/htroot/IndexDeletion_p.java
@ -161,7 +161,7 @@ public class IndexDeletion_p {
                String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/";
                if (simulate) {
                    try {
-                        count = (int) defaultConnector.getCountByQuery(regexquery);
+                        count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery);
                    } catch (final IOException e) {
                    }
                    prop.put("urldelete-active", count == 0 ? 2 : 1);
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -2558,7 +2558,7 @@ public final class Switchboard extends serverSwitch {
                throw new Parser.Failure("Parser returned null.", response.url());
            }
        } catch (final Parser.Failure e ) {
-            this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
+            this.log.warn("Unable to parse the resource '" + response.url().toNormalform(true) + "'. " + e.getMessage());
            // create a new errorURL DB entry
            this.crawlQueues.errorURL.push(response.url(), response.depth(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1);
            return null;
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@ -401,7 +401,7 @@ public final class Fulltext {
                for (String hosthash: subset) {
                    if (query.length() > 0) query.append(" OR ");
                    //query.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append(":\"");
-                    query.append("({!raw f=").append(fieldname).append('}').append(hosthash).append(")");
+                    query.append("({!cache=false raw f=").append(fieldname).append('}').append(hosthash).append(")");
                }
                if (constraintQuery == null) connector.deleteByQuery(query.toString()); else connector.deleteByQuery("(" + query.toString() + ") AND " + constraintQuery);
            } catch (final IOException e) {
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -275,7 +275,7 @@ public class Segment {
            if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
                // reqd the references from the webgraph
                SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
-                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
+                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!cache=false raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
                SolrDocument doc;
                try {
                    while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@ -1078,7 +1078,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
                // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
                String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
-                long patchquerycount = collectionConnector.getCountByQuery(patchquery);
+                long patchquerycount = collectionConnector.getCountByQuery("{!cache=false}" + patchquery);
                BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, true,
                        CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
                SolrDocument doc_B;
@ -1172,7 +1172,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                    postprocessingActivity = "writing cr values to webgraph for host " + host;
                    ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
                    String patchquery = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\" AND " + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
-                    final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(patchquery);
+                    final long count = segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + patchquery);
                    int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4));
                    ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency);
                    final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(
@ -1587,7 +1587,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                        doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
                        doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
                        doccountterm.addOperand(new LongLiteral(signaturefield, signature));
-                        long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountterm.toString());
+                        long doccount = segment.fulltext().getDefaultConnector().getCountByQuery("{!cache=false}" + doccountterm.toString());
                        sid.setField(uniquefield.getSolrFieldName(), doccount  == 0);
                    } catch (final IOException e) {}
                }
@ -1682,7 +1682,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
            this.crt = new ConcurrentHashMap<String, double[]>();
            try {
                // select all documents for each host
-                BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
+                BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!cache=false raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
                String id;
                while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
                    this.crt.put(id, new double[]{0.0d,0.0d}); //{old value, new value}