Added Solr filter queries for audio, video and application domains

Inspired from the existing one used on image search, and consistent with
post filtering on content domain applied in SearchEvent.addNodes().

These filters are quite simplistic but at least audio, video or
application search now return results. Previously, when filtering on
these content domains, many results pages (and often even the first
page) were empty while the total results count suggested that results
should be available. This was because filtering on domain was only
applied AFTER requesting Solr indexes.
This commit is contained in:
luccioman 2017-09-08 11:16:37 +02:00
parent 5d3ceb31b7
commit 66cb9c4ff9
2 changed files with 85 additions and 8 deletions

View File

@ -33,6 +33,8 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.http.HttpStatus;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.order.NaturalOrder;
@ -345,7 +347,7 @@ public class QueryGoal {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
if (noimages) {
fqs.add("-" + CollectionSchema.content_type.getSolrFieldName() + ":(image/*)");
fqs.add("-" + CollectionSchema.url_file_ext_s.getSolrFieldName() + ":(jpg OR png OR gif)");
@ -379,13 +381,67 @@ public class QueryGoal {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200");
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(
CollectionSchema.content_type.getSolrFieldName() + ":(image/*) OR " +
CollectionSchema.images_urlstub_sxt.getSolrFieldName() + AbstractSolrConnector.CATCHALL_DTERM);
return fqs;
}
/**
* Generate Solr filter queries to receive valid video content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
*
* @return Solr filter queries for video content URLs
*/
public List<String> collectionAudioFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(audio/*) OR "
+ CollectionSchema.audiolinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
/**
* Generate Solr filter queries to receive valid video content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix video/* as well
* docuemnts with links to video content.
*
* @return Solr filter queries for video content URLs
*/
public List<String> collectionVideoFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(video/*) OR "
+ CollectionSchema.videolinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
/**
* Generate Solr filter queries to receive valid application specific content results.
*
* This filters out documents with bad HTTP status and includes documents with MIME type matching the prefix application/* as well
* docuemnts with links to application specific content.
*
* @return Solr filter queries for application specific content URLs
*/
public List<String> collectionApplicationFilterQuery() {
final ArrayList<String> fqs = new ArrayList<>();
// add filter to prevent that results come from failed urls
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
fqs.add(CollectionSchema.content_type.getSolrFieldName() + ":(application/*) OR "
+ CollectionSchema.applinkscount_i.getSolrFieldName() + ":[1 TO *]");
return fqs;
}
public StringBuilder collectionImageQuery(final QueryModifier modifier) {
final StringBuilder q = new StringBuilder(80);

View File

@ -111,7 +111,6 @@ public final class QueryParams {
public static final Bitfield empty_constraint = new Bitfield(4, "AAAAAA");
public static final Pattern catchall_pattern = Pattern.compile(".*");
private static final Pattern matchnothing_pattern = Pattern.compile("");
private final QueryGoal queryGoal;
public int itemsPerPage;
@ -370,11 +369,33 @@ public final class QueryParams {
}
public SolrQuery solrQuery(final ContentDomain cd, final boolean getFacets, final boolean excludeintext_image) {
if (cd == ContentDomain.IMAGE) return solrImageQuery(getFacets);
return solrTextQuery(getFacets, excludeintext_image);
if (cd == ContentDomain.IMAGE) {
return solrImageQuery(getFacets);
}
final List<String> filterQueries;
switch (cd) {
case AUDIO:
filterQueries = this.queryGoal.collectionAudioFilterQuery();
break;
case VIDEO:
filterQueries = this.queryGoal.collectionVideoFilterQuery();
break;
case APP:
filterQueries = this.queryGoal.collectionApplicationFilterQuery();
break;
default:
filterQueries = this.queryGoal.collectionTextFilterQuery(excludeintext_image);
break;
}
return solrQuery(getFacets, filterQueries);
}
private SolrQuery solrTextQuery(final boolean getFacets, final boolean excludeintext_image) {
/**
* @param getFacets when true, generate facets for fiels given in this.facetfields
* @param filterQueries a mutable list of filter queries, initialized with filters related to content domain. Must not be null.
* @return a Solr query instance ready to use
*/
private SolrQuery solrQuery(final boolean getFacets, final List<String> filterQueries) {
if (this.cachedQuery != null) {
this.cachedQuery.setStart(this.offset);
if (!getFacets) this.cachedQuery.setFacet(false);
@ -382,7 +403,7 @@ public final class QueryParams {
}
// construct query
final SolrQuery params = getBasicParams(getFacets, this.queryGoal.collectionTextFilterQuery(excludeintext_image));
final SolrQuery params = getBasicParams(getFacets, filterQueries);
int rankingProfile = this.ranking.coeff_date == RankingProfile.COEFF_MAX ? 1 : (this.modifier.sitehash != null || this.modifier.sitehost != null) ? 2 : 0;
params.setQuery(this.queryGoal.collectionTextQuery().toString());
Ranking actRanking = indexSegment.fulltext().getDefaultConfiguration().getRanking(rankingProfile); // for a by-date ranking select different ranking profile
@ -438,7 +459,7 @@ public final class QueryParams {
return params;
}
private SolrQuery getBasicParams(boolean getFacets, List<String> fqs) {
private SolrQuery getBasicParams(final boolean getFacets, final List<String> fqs) {
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax");
params.setParam(DisMaxParams.QF, CollectionSchema.text_t.getSolrFieldName() + "^1.0");