mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
using the solr search index to concurrently search within solr and the
rwis during local search requests.
This commit is contained in:
parent
6197caf698
commit
395b78a0d8
|
@ -33,7 +33,10 @@ import java.util.concurrent.BlockingQueue;
|
|||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.kelondro.data.meta.URIMetadata;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataNode;
|
||||
import net.yacy.kelondro.index.Row;
|
||||
import net.yacy.kelondro.index.Row.Entry;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
@ -45,6 +48,8 @@ import net.yacy.kelondro.rwi.Reference;
|
|||
import net.yacy.kelondro.rwi.ReferenceContainer;
|
||||
import net.yacy.kelondro.util.ByteArray;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
|
||||
|
||||
public class WordReferenceVars extends AbstractReference implements WordReference, Reference, Cloneable, Comparable<WordReferenceVars>, Comparator<WordReferenceVars> {
|
||||
|
||||
|
@ -55,12 +60,12 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
|||
private static int cores = Runtime.getRuntime().availableProcessors();
|
||||
public static final byte[] default_language = UTF8.getBytes("uk");
|
||||
|
||||
public Bitfield flags;
|
||||
public final Bitfield flags;
|
||||
public long lastModified;
|
||||
public byte[] language;
|
||||
public byte[] urlHash;
|
||||
public final byte[] language;
|
||||
public final byte[] urlHash;
|
||||
private String hostHash = null;
|
||||
public char type;
|
||||
public final char type;
|
||||
public int hitcount, llocal, lother, phrasesintext,
|
||||
posinphrase, posofphrase,
|
||||
urlcomps, urllength,
|
||||
|
@ -69,6 +74,31 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
|
|||
private final Queue<Integer> positions;
|
||||
public double termFrequency;
|
||||
|
||||
public WordReferenceVars(final SolrDocument doc) {
|
||||
URIMetadata md = new URIMetadataNode(doc);
|
||||
this.language = md.language();
|
||||
this.flags = md.flags();
|
||||
this.lastModified = md.moddate().getTime();
|
||||
this.urlHash = md.hash();
|
||||
this.type = md.doctype();
|
||||
this.llocal = md.llocal();
|
||||
this.lother = md.lother();
|
||||
this.positions = new LinkedBlockingQueue<Integer>();
|
||||
this.positions.add(1);
|
||||
String urlNormalform = md.url().toNormalform(true, false);
|
||||
this.urlcomps = MultiProtocolURI.urlComps(urlNormalform).length;
|
||||
this.urllength = urlNormalform.length();
|
||||
this.virtualAge = -1; // compute that later
|
||||
// the following fields cannot be computed here very easy and are just filled with dummy values
|
||||
this.phrasesintext = 1;
|
||||
this.hitcount = 1;
|
||||
this.posinphrase = 1;
|
||||
this.posofphrase = 1;
|
||||
this.wordsintext = 1;
|
||||
this.wordsintitle = 1;
|
||||
this.termFrequency = 1;
|
||||
}
|
||||
|
||||
public WordReferenceVars(
|
||||
final byte[] urlHash,
|
||||
final int urlLength, // byte-length of complete URL
|
||||
|
|
|
@ -323,6 +323,15 @@ public final class QueryParams {
|
|||
return this.domType == Searchdom.LOCAL;
|
||||
}
|
||||
|
||||
public String solrQuery() {
|
||||
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
|
||||
StringBuilder sb = new StringBuilder(80);
|
||||
for (String s: this.query_include_words) {sb.append('+'); sb.append(s);}
|
||||
for (String s: this.query_exclude_words) {sb.append("+-"); sb.append(s);}
|
||||
if (sb.length() == 0) return null;
|
||||
return "text_t:" + sb.substring(1, sb.length());
|
||||
}
|
||||
|
||||
public static HandleSet hashes2Set(final String query) {
|
||||
final HandleSet keyhashes = new RowHandleSet(WordReferenceRow.urlEntryRow.primaryKeyLength, WordReferenceRow.urlEntryRow.objectOrder, 0);
|
||||
if (query != null) {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
package net.yacy.search.query;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.ConcurrentModificationException;
|
||||
import java.util.HashMap;
|
||||
|
@ -74,6 +75,10 @@ import net.yacy.search.index.Segment;
|
|||
import net.yacy.search.ranking.ReferenceOrder;
|
||||
import net.yacy.search.snippet.ResultEntry;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||
import com.hp.hpl.jena.rdf.model.Resource;
|
||||
|
||||
|
@ -193,6 +198,30 @@ public final class RWIProcess extends Thread
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
// start a concurrent solr search
|
||||
if (this.query.query_include_words != null) {
|
||||
Thread solrSearch = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
Thread.currentThread().setName("SearchEvent.solrSearch");
|
||||
String solrQuery = RWIProcess.this.query.solrQuery();
|
||||
try {
|
||||
ReferenceContainer<WordReference> wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null);
|
||||
SolrDocumentList sdl = RWIProcess.this.query.getSegment().getSolr().query(solrQuery, 0, 20);
|
||||
for (SolrDocument d : sdl) {
|
||||
try {wr.add(new WordReferenceVars(d));} catch (SpaceExceededException e) {}
|
||||
}
|
||||
Log.logInfo("SearchEvent", "added " + wr.size() + " hits from solr to ranking process");
|
||||
RWIProcess.this.add(wr, true, "embedded solr", sdl.size(), 60000);
|
||||
} catch (SolrException e) {
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
};
|
||||
solrSearch.start();
|
||||
}
|
||||
|
||||
// do a search
|
||||
oneFeederStarted();
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user