mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
do not cache search requests to Solr if the result is used for
doublechecking. If a double-check comes from cached results the doublecheck fails.
This commit is contained in:
parent
a0b84e4def
commit
70f03f7c8e
|
@ -161,7 +161,7 @@ public class IndexDeletion_p {
|
|||
String regexquery = CollectionSchema.sku.getSolrFieldName() + ":/" + urldelete + "/";
|
||||
if (simulate) {
|
||||
try {
|
||||
count = (int) defaultConnector.getCountByQuery(regexquery);
|
||||
count = (int) defaultConnector.getCountByQuery("{!cache=false}" + regexquery);
|
||||
} catch (final IOException e) {
|
||||
}
|
||||
prop.put("urldelete-active", count == 0 ? 2 : 1);
|
||||
|
|
|
@ -2558,7 +2558,7 @@ public final class Switchboard extends serverSwitch {
|
|||
throw new Parser.Failure("Parser returned null.", response.url());
|
||||
}
|
||||
} catch (final Parser.Failure e ) {
|
||||
this.log.warn("Unable to parse the resource '" + response.url() + "'. " + e.getMessage());
|
||||
this.log.warn("Unable to parse the resource '" + response.url().toNormalform(true) + "'. " + e.getMessage());
|
||||
// create a new errorURL DB entry
|
||||
this.crawlQueues.errorURL.push(response.url(), response.depth(), response.profile(), FailCategory.FINAL_PROCESS_CONTEXT, e.getMessage(), -1);
|
||||
return null;
|
||||
|
|
|
@ -401,7 +401,7 @@ public final class Fulltext {
|
|||
for (String hosthash: subset) {
|
||||
if (query.length() > 0) query.append(" OR ");
|
||||
//query.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append(":\"");
|
||||
query.append("({!raw f=").append(fieldname).append('}').append(hosthash).append(")");
|
||||
query.append("({!cache=false raw f=").append(fieldname).append('}').append(hosthash).append(")");
|
||||
}
|
||||
if (constraintQuery == null) connector.deleteByQuery(query.toString()); else connector.deleteByQuery("(" + query.toString() + ") AND " + constraintQuery);
|
||||
} catch (final IOException e) {
|
||||
|
|
|
@ -275,7 +275,7 @@ public class Segment {
|
|||
if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
|
||||
// reqd the references from the webgraph
|
||||
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
|
||||
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
|
||||
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!cache=false raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, false, WebgraphSchema.source_id_s.getSolrFieldName());
|
||||
SolrDocument doc;
|
||||
try {
|
||||
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
|
||||
|
|
|
@ -1078,7 +1078,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
|
|||
// If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
|
||||
// To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
|
||||
String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
|
||||
long patchquerycount = collectionConnector.getCountByQuery(patchquery);
|
||||
long patchquerycount = collectionConnector.getCountByQuery("{!cache=false}" + patchquery);
|
||||
BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, Long.MAX_VALUE, 20, 1, true,
|
||||
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
|
||||
SolrDocument doc_B;
|
||||
|
@ -1172,7 +1172,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
|
|||
postprocessingActivity = "writing cr values to webgraph for host " + host;
|
||||
ConcurrentLog.info("CollectionConfiguration", postprocessingActivity);
|
||||
String patchquery = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\" AND " + WebgraphSchema.process_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM;
|
||||
final long count = segment.fulltext().getWebgraphConnector().getCountByQuery(patchquery);
|
||||
final long count = segment.fulltext().getWebgraphConnector().getCountByQuery("{!cache=false}" + patchquery);
|
||||
int concurrency = Math.min((int) count, Math.max(1, Runtime.getRuntime().availableProcessors() / 4));
|
||||
ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph, concurrency = " + concurrency);
|
||||
final BlockingQueue<SolrDocument> docs = segment.fulltext().getWebgraphConnector().concurrentDocumentsByQuery(
|
||||
|
@ -1587,7 +1587,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
|
|||
doccountterm.addOperand(new Negation(new StringLiteral(CollectionSchema.id, urlhash)));
|
||||
doccountterm.addOperand(new StringLiteral(CollectionSchema.host_id_s, hostid));
|
||||
doccountterm.addOperand(new LongLiteral(signaturefield, signature));
|
||||
long doccount = segment.fulltext().getDefaultConnector().getCountByQuery(doccountterm.toString());
|
||||
long doccount = segment.fulltext().getDefaultConnector().getCountByQuery("{!cache=false}" + doccountterm.toString());
|
||||
sid.setField(uniquefield.getSolrFieldName(), doccount == 0);
|
||||
} catch (final IOException e) {}
|
||||
}
|
||||
|
@ -1682,7 +1682,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
|
|||
this.crt = new ConcurrentHashMap<String, double[]>();
|
||||
try {
|
||||
// select all documents for each host
|
||||
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
|
||||
BlockingQueue<String> ids = connector.concurrentIDsByQuery("{!cache=false raw f=" + CollectionSchema.host_s.getSolrFieldName() + "}" + host, CollectionSchema.url_chars_i.getSolrFieldName() + " asc", 0, 100000000, 86400000, 200, 1);
|
||||
String id;
|
||||
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
|
||||
this.crt.put(id, new double[]{0.0d,0.0d}); //{old value, new value}
|
||||
|
|
Loading…
Reference in New Issue
Block a user