mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added query modifier 'on'. This makes it possible to search for date
occurrences within the (web) page documents (not the document last-modified!). This works only if the solr field dates_in_content_sxt is enabled. A search request may then have the form "term on:<date>", like gift on:24.12.2014 gift on:2014/12/24 * on:2014/12/31 For the date format you may use any kind of human-readable date representation(!yes!) - the on:<date> parser tries to identify language and also knows event names, like: bunny on:eastern .. as long as the date term has no spaces inside (use a dot). Further enhancement will be made to accept also strings encapsulated with quotes.
This commit is contained in:
parent
1cfddea578
commit
65125439fe
|
@ -35,6 +35,8 @@ import java.util.TreeMap;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import net.yacy.cora.date.GenericFormatter;
|
||||
|
||||
/**
|
||||
* The purpose of this class exceeds the demands on simple date parsing using a SimpleDateFormat
|
||||
* because it tries to
|
||||
|
@ -494,6 +496,20 @@ public class DateDetection {
|
|||
return dates;
|
||||
}
|
||||
|
||||
public static Date parseLine(String text) {
|
||||
Date d = null;
|
||||
try {d = CONFORM.parse(text);} catch (ParseException e) {}
|
||||
if (d == null) try {d = GenericFormatter.FORMAT_SHORT_DAY.parse(text);} catch (ParseException e) {}
|
||||
if (d == null) try {d = GenericFormatter.FORMAT_RFC1123_SHORT.parse(text);} catch (ParseException e) {}
|
||||
if (d == null) try {d = GenericFormatter.FORMAT_ANSIC.parse(text);} catch (ParseException e) {}
|
||||
|
||||
if (d == null) {
|
||||
Set<Date> dd = parse(text);
|
||||
if (dd.size() >= 1) d = dd.iterator().next();
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
private static LinkedHashSet<Date> parseRawDate(String text) {
|
||||
// get parse alternatives for different date styles; we consider that one document uses only one style
|
||||
LinkedHashSet<Date> DMYDates = EndianStyle.DMY.parse(text);
|
||||
|
|
|
@ -22,6 +22,7 @@ package net.yacy.search.query;
|
|||
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
|
@ -29,6 +30,7 @@ import org.apache.solr.common.params.MultiMapSolrParams;
|
|||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.util.CommonPattern;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.document.DateDetection;
|
||||
import net.yacy.kelondro.util.ISO639;
|
||||
import net.yacy.search.schema.CollectionSchema;
|
||||
import net.yacy.server.serverObjects;
|
||||
|
@ -37,7 +39,7 @@ import net.yacy.server.serverObjects;
|
|||
public class QueryModifier {
|
||||
|
||||
private final StringBuilder modifier;
|
||||
public String sitehost, sitehash, filetype, protocol, language, author, collection;
|
||||
public String sitehost, sitehash, filetype, protocol, language, author, collection, on;
|
||||
|
||||
public QueryModifier() {
|
||||
this.sitehash = null;
|
||||
|
@ -47,6 +49,7 @@ public class QueryModifier {
|
|||
this.language = null;
|
||||
this.author = null;
|
||||
this.collection = null;
|
||||
this.on = null;
|
||||
this.modifier = new StringBuilder(20);
|
||||
}
|
||||
|
||||
|
@ -142,6 +145,18 @@ public class QueryModifier {
|
|||
querystring = querystring.replace("collection:" + this.collection, "");
|
||||
add("collection:" + this.collection);
|
||||
}
|
||||
|
||||
// parse on-date
|
||||
final int oni = querystring.indexOf("on:", 0);
|
||||
if ( oni >= 0 ) {
|
||||
int ftb = querystring.indexOf(' ', oni);
|
||||
if ( ftb == -1 ) {
|
||||
ftb = querystring.length();
|
||||
}
|
||||
this.on = querystring.substring(oni + 3, ftb);
|
||||
querystring = querystring.replace("on:" + this.on, "");
|
||||
add("on:" + this.on);
|
||||
}
|
||||
|
||||
// parse language
|
||||
final int langi = querystring.indexOf("/language/");
|
||||
|
@ -240,6 +255,10 @@ public class QueryModifier {
|
|||
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.collection));
|
||||
}
|
||||
|
||||
if (this.on != null && this.on.length() > 0 && fq.indexOf(CollectionSchema.dates_in_content_sxt.getSolrFieldName()) < 0) {
|
||||
fq.append(" AND ").append(QueryModifier.parseOnExpression(this.on));
|
||||
}
|
||||
|
||||
if (this.protocol != null && this.protocol.length() > 0 && fq.indexOf(CollectionSchema.url_protocol_s.getSolrFieldName()) < 0) {
|
||||
fq.append(" AND ").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(":\"").append(this.protocol).append('\"');
|
||||
}
|
||||
|
@ -295,6 +314,15 @@ public class QueryModifier {
|
|||
filterQuery.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
|
||||
}
|
||||
return filterQuery.toString();
|
||||
}
|
||||
|
||||
public static String parseOnExpression(String onDescription) {
|
||||
Date onDate = DateDetection.parseLine(onDescription);
|
||||
StringBuilder filterQuery = new StringBuilder(20);
|
||||
if (onDate != null) {
|
||||
filterQuery.append(CollectionSchema.dates_in_content_sxt.getSolrFieldName()).append(":\"").append(org.apache.solr.schema.TrieDateField.formatExternal(onDate)).append('\"');
|
||||
}
|
||||
return filterQuery.toString();
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -481,6 +481,10 @@ public final class QueryParams {
|
|||
fq.append(" AND ").append(QueryModifier.parseCollectionExpression(this.modifier.collection));
|
||||
}
|
||||
|
||||
if (this.modifier.on != null && this.modifier.on.length() > 0 && this.solrSchema.contains(CollectionSchema.dates_in_content_sxt)) {
|
||||
fq.append(" AND ").append(QueryModifier.parseOnExpression(this.modifier.on));
|
||||
}
|
||||
|
||||
if (this.modifier.protocol != null) {
|
||||
fq.append(" AND {!tag=").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append("}").append(CollectionSchema.url_protocol_s.getSolrFieldName()).append(':').append(this.modifier.protocol);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user