added a 'collection' property attribute in yacysearch.html which can be

used to select between different collections as defined during a crawl
start with the 'collection' attribute. This actually implements the
ability to prepare search tenants which restrict their search results to
a specific collection. The main use for this is to provide tenants to
the yaml4 interface (at this time).
This commit is contained in:
orbiter 2013-04-23 20:42:54 +02:00
parent 3e79bd4b1f
commit f7571386a3
5 changed files with 79 additions and 24 deletions

View File

@ -22,7 +22,6 @@ import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.Ranking;
@ -30,11 +29,11 @@ import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.query.QueryModifier;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
@ -150,28 +149,13 @@ public class searchresult {
post.put("hl.simple.post", "</b>");
post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
String[] site = post.remove("site"); // example: col1|col2
//String[] access = post.remove("access");
//String[] entqr = post.remove("entqr");
// add sites operator
String[] site = post.remove("site"); // example: col1|col2
if (site != null && site[0].length() > 0) {
String[] s0 = CommonPattern.VERTICALBAR.split(site[0]);
ArrayList<String> sites = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim().toLowerCase();
if (s.length() > 0) sites.add(s);
}
StringBuilder fq = new StringBuilder(20);
if (sites.size() > 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
for (int i = 1; i < sites.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
}
} else if (sites.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
}
post.put(CommonParams.FQ, fq.toString());
post.put(CommonParams.FQ, QueryModifier.parseCollectionExpression(site[0]));
}
// get the embedded connector

View File

@ -358,6 +358,9 @@ public class yacysearch {
final RankingProfile ranking = sb.getRanking();
final QueryModifier modifier = new QueryModifier();
querystring = modifier.parse(querystring);
// read collection
modifier.collection = post.get("collection", "");
int stp = querystring.indexOf('*');
if (stp >= 0) {

View File

@ -25,20 +25,27 @@
package net.yacy.cora.document;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.lod.vocabulary.Geo;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.CommonPattern;
import net.yacy.document.Document;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {
@ -335,4 +342,29 @@ public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMe
size = size / 1024;
return size + " gbyte";
}
/*
public Document toDocument() {
DigestURI url = new DigestURI(this.getLink());
List<String> titles = new ArrayList<String>();
titles.add(this.getTitle());
return new Document(
url,
Classification.ext2mime(url.getFileExtension(), "text/plain"),
"UTF8",
null,
this.getLanguage(),
Token.subject.valueFrom(this.map, ""),
titles,
this.getAuthor(),
this.getCopyright(),
null,
this.getDescription(),
0.0d, 0.0d,
this.getFulltext(),
null,
null,
null,
false);
}
*/
}

View File

@ -20,8 +20,11 @@
package net.yacy.search.query;
import java.util.ArrayList;
import org.apache.solr.common.params.CommonParams;
import net.yacy.cora.util.CommonPattern;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.schema.CollectionSchema;
import net.yacy.server.serverObjects;
@ -30,7 +33,7 @@ import net.yacy.server.serverObjects;
public class QueryModifier {
private final StringBuilder modifier;
public String sitehost, sitehash, filetype, protocol, author;
public String sitehost, sitehash, filetype, protocol, author, collection;
public QueryModifier() {
this.sitehash = null;
@ -38,6 +41,7 @@ public class QueryModifier {
this.filetype = null;
this.protocol = null;
this.author = null;
this.collection = null;
this.modifier = new StringBuilder(20);
}
@ -183,4 +187,30 @@ public class QueryModifier {
}
}
/**
* parse a GSA site description string and create a filter query string
* which is used to restrict the search result to collections as named with the site attributes
* @param collectionDescription
* @return a solr query string which shall be used for a filter query
*/
public static String parseCollectionExpression(String collectionDescription) {
String[] s0 = CommonPattern.VERTICALBAR.split(collectionDescription);
ArrayList<String> sites = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim().toLowerCase();
if (s.length() > 0) sites.add(s);
}
StringBuilder filterQuery = new StringBuilder(20);
if (sites.size() > 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
for (int i = 1; i < sites.size(); i++) {
filterQuery.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
}
} else if (sites.size() == 1) {
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
}
return filterQuery.toString();
}
}

View File

@ -503,6 +503,11 @@ public final class QueryParams {
//params.setSortField(CollectionSchema.last_modified.getSolrFieldName(), ORDER.desc); // deprecated in Solr 4.2
}
}
if (this.modifier.collection != null && this.modifier.collection.length() > 0) {
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
}
if (fq.length() > 0) {
params.setFilterQueries(fq.substring(5));
}
@ -570,15 +575,16 @@ public final class QueryParams {
context.append(Base64Order.enhancedCoder.encodeString(this.prefer.toString())).append(asterisk);
context.append(Base64Order.enhancedCoder.encodeString(this.urlMask.toString())).append(asterisk);
context.append(this.modifier.sitehash).append(asterisk);
context.append(this.siteexcludes).append(asterisk);
context.append(this.modifier.author).append(asterisk);
context.append(this.modifier.protocol).append(asterisk);
context.append(this.modifier.filetype).append(asterisk);
context.append(this.modifier.collection).append(asterisk);
context.append(this.modifier.toString()).append(asterisk);
context.append(this.siteexcludes).append(asterisk);
context.append(this.targetlang).append(asterisk);
context.append(this.constraint).append(asterisk);
context.append(this.maxDistance).append(asterisk);
context.append(this.modifier.toString()).append(asterisk);
context.append(this.modifier.protocol).append(asterisk);
context.append(this.tld).append(asterisk);
context.append(this.modifier.filetype).append(asterisk);
context.append(this.inlink).append(asterisk);
context.append(this.lat).append(asterisk).append(this.lon).append(asterisk).append(this.radius).append(asterisk);
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());