using the solr search index to concurrently search within solr and the

rwis during local search requests.
This commit is contained in:
Michael Peter Christen 2012-08-17 01:21:56 +02:00
parent 6197caf698
commit 395b78a0d8
3 changed files with 72 additions and 4 deletions

View File

@ -33,7 +33,10 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.logging.Log;
@ -45,6 +48,8 @@ import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ByteArray;
import org.apache.solr.common.SolrDocument;
public class WordReferenceVars extends AbstractReference implements WordReference, Reference, Cloneable, Comparable<WordReferenceVars>, Comparator<WordReferenceVars> {
@ -55,12 +60,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private static int cores = Runtime.getRuntime().availableProcessors();
public static final byte[] default_language = UTF8.getBytes("uk");
public Bitfield flags;
public final Bitfield flags;
public long lastModified;
public byte[] language;
public byte[] urlHash;
public final byte[] language;
public final byte[] urlHash;
private String hostHash = null;
public char type;
public final char type;
public int hitcount, llocal, lother, phrasesintext,
posinphrase, posofphrase,
urlcomps, urllength,
@ -69,6 +74,31 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
private final Queue<Integer> positions;
public double termFrequency;
public WordReferenceVars(final SolrDocument doc) {
URIMetadata md = new URIMetadataNode(doc);
this.language = md.language();
this.flags = md.flags();
this.lastModified = md.moddate().getTime();
this.urlHash = md.hash();
this.type = md.doctype();
this.llocal = md.llocal();
this.lother = md.lother();
this.positions = new LinkedBlockingQueue<Integer>();
this.positions.add(1);
String urlNormalform = md.url().toNormalform(true, false);
this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length;
this.urllength = urlNormalform.length();
this.virtualAge = -1; // compute that later
// the following fields cannot be computed here very easy and are just filled with dummy values
this.phrasesintext = 1;
this.hitcount = 1;
this.posinphrase = 1;
this.posofphrase = 1;
this.wordsintext = 1;
this.wordsintitle = 1;
this.termFrequency = 1;
}
public WordReferenceVars(
final byte[] urlHash,
final int urlLength, // byte-length of complete URL

View File

@ -323,6 +323,15 @@ public final class QueryParams {
return this.domType == Searchdom.LOCAL;
}
public String solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
StringBuilder sb = new StringBuilder(80);
for (String s: this.query_include_words) {sb.append('+'); sb.append(s);}
for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);}
if (sb.length() == 0) return null;
return "text_t:" + sb.substring(1, sb.length());
}
public static HandleSet hashes2Set(final String query) {
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
if (query != null) {

View File

@ -26,6 +26,7 @@
package net.yacy.search.query;
import java.io.IOException;
import java.util.Comparator;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
@ -74,6 +75,10 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ResultEntry;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
@ -193,6 +198,30 @@ public final class RWIProcess extends Thread
@Override
public void run() {
// start a concurrent solr search
if (this.query.query_include_words != null) {
Thread solrSearch = new Thread() {
@Override
public void run() {
Thread.currentThread().setName("SearchEvent.solrSearch");
String solrQuery = RWIProcess.this.query.solrQuery();
try {
ReferenceContainer<WordReference> wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null);
SolrDocumentList sdl = RWIProcess.this.query.getSegment().getSolr().query(solrQuery, 0, 20);
for (SolrDocument d : sdl) {
try {wr.add(new WordReferenceVars(d));} catch (SpaceExceededException e) {}
}
Log.logInfo("SearchEvent", "added " + wr.size() + " hits from solr to ranking process");
RWIProcess.this.add(wr, true, "embedded solr", sdl.size(), 60000);
} catch (SolrException e) {
} catch (IOException e) {
}
}
};
solrSearch.start();
}
// do a search
oneFeederStarted();