do not cache search requests to Solr if the result is used for

doublechecking. If a double-check comes from cached results the
doublecheck fails.
This commit is contained in:
Michael Peter Christen 2014-11-20 18:45:27 +01:00
parent a0b84e4def
commit 70f03f7c8e
5 changed files with 8 additions and 8 deletions

View File

@ -161,7 +161,7 @@ public class IndexDeletion_p {
String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/";
if (simulate) {
try {
count = (int) defaultConnector.getCountByQuery(regexquery);
count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery);
} catch (final IOException e) {
}
prop.put("urldelete-active", count == 0 ? 2 : 1);

View File

@ -2558,7 +2558,7 @@ public final class Switchboard extends serverSwitch {
throw new Parser.Failure("Parser returned null.", response.url());
}
} catch (final Parser.Failure e ) {
this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
this.log.warn("Unable to parse the resource '" + response.url().toNormalform(true) + "'. " + e.getMessage());
// create a new errorURL DB entry
this.crawlQueues.errorURL.push(response.url(), response.depth(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1);
return null;

View File

@ -401,7 +401,7 @@ public final class Fulltext {
for (String hosthash: subset) {
if (query.length() > 0) query.append(" OR ");
//query.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append(":\"");
query.append("({!raw f=").append(fieldname).append('}').append(hosthash).append(")");
query.append("({!cache=false raw f=").append(fieldname).append('}').append(hosthash).append(")");
}
if (constraintQuery == null) connector.deleteByQuery(query.toString()); else connector.deleteByQuery("(" + query.toString() + ") AND " + constraintQuery);
} catch (final IOException e) {

View File

@ -275,7 +275,7 @@ public class Segment {
if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
// reqd the references from the webgraph
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!cache=false raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
SolrDocument doc;
try {
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {

View File

@ -1078,7 +1078,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
// To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
long patchquerycount = collectionConnector.getCountByQuery(patchquery);
long patchquerycount = collectionConnector.getCountByQuery("{!cache=false}" + patchquery);
BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, true,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
SolrDocument doc_B;
@ -1172,7 +1172,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
postprocessingActivity = "writing cr values to webgraph for host " + host;
ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
String patchquery = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\" AND " + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(patchquery);
final long count = segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + patchquery);
int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4));
ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency);
final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(
@ -1587,7 +1587,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
doccountterm.addOperand(new LongLiteral(signaturefield, signature));
long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountterm.toString());
long doccount = segment.fulltext().getDefaultConnector().getCountByQuery("{!cache=false}" + doccountterm.toString());
sid.setField(uniquefield.getSolrFieldName(), doccount == 0);
} catch (final IOException e) {}
}
@ -1682,7 +1682,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
this.crt = new ConcurrentHashMap<String, double[]>();
try {
// select all documents for each host
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!cache=false raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
String id;
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
this.crt.put(id, new double[]{0.0d,0.0d}); //{old value, new value}