Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

This commit is contained in:
Michael Peter Christen 2014-06-29 22:25:08 +02:00
commit f5b817bac4
4 changed files with 42 additions and 14 deletions

View File

@ -86,6 +86,14 @@ public class docParser extends AbstractParser implements Parser {
if (title.length() == l) break;
l = title.length();
}
// get keywords (for yacy as array)
final String keywords = extractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else {
keywlist = null;
}
Document[] docs;
docs = new Document[]{new Document(
@ -94,9 +102,9 @@ public class docParser extends AbstractParser implements Parser {
"UTF-8",
this,
null,
null,
keywlist,
singleList(title),
"", // TODO: AUTHOR
extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
extractor.getDocSummaryInformation().getCompany(), // publisher
null,
null,

View File

@ -78,6 +78,12 @@ public class pptParser extends AbstractParser implements Parser {
if (title.length() == l) break;
l = title.length();
}
// get keywords (for yacy as array)
final String keywords = pptExtractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else keywlist = null;
/*
* create the plasmaParserDocument for the database
@ -89,9 +95,9 @@ public class pptParser extends AbstractParser implements Parser {
"UTF-8",
this,
null,
null,
keywlist,
singleList(title),
"", // TODO: AUTHOR
pptExtractor.getSummaryInformation().getAuthor(), // may be null
pptExtractor.getDocSummaryInformation().getCompany(),
null,
null,

View File

@ -35,11 +35,6 @@ import java.util.Set;
import java.util.SortedSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.common.params.FacetParams;
import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain;
import net.yacy.cora.document.encoding.ASCII;
@ -65,6 +60,10 @@ import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
public final class QueryParams {
@ -227,7 +226,8 @@ public final class QueryParams {
this.solrSchema = indexSegment.fulltext().getDefaultConfiguration();
for (String navkey: search_navigation) {
CollectionSchema f = defaultfacetfields.get(navkey);
if (f != null && solrSchema.contains(f)) this.facetfields.add(f.getSolrFieldName());
// handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield))
if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt"))) this.facetfields.add(f.getSolrFieldName());
}
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
@ -358,8 +358,8 @@ public final class QueryParams {
bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10";
}
if (fq.length() > 0) {
String oldfq = params.get("fq");
params.setParam("fq", oldfq == null || oldfq.length() == 0 ? fq : "(" + oldfq + ") AND (" + fq + ")");
String oldfq = params.get(CommonParams.FQ);
params.setParam(CommonParams.FQ, oldfq == null || oldfq.length() == 0 ? fq : "(" + oldfq + ") AND (" + fq + ")");
}
if (bq.length() > 0) params.setParam("bq", bq);
if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
@ -465,8 +465,8 @@ public final class QueryParams {
fq.append(" AND ").append(CollectionSchema.language_s.getSolrFieldName()).append(":\"").append(this.modifier.language).append('\"');
}
// add author facets
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author_sxt)) {
// add author facets (check for contains(author) as author_sxt is omitted copyfield)
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(CollectionSchema.author)) {
fq.append(" AND ").append(CollectionSchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"');
}

View File

@ -911,6 +911,13 @@ public final class SearchEvent {
continue pollloop;
}
}
if (this.query.modifier.author != null) {
if (!this.query.modifier.author.equals(iEntry.dc_creator())) {
if (log.isFine()) log.fine ("dropped Node: author");
continue pollloop;
}
}
// finally extend the double-check and insert result to stack
this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
@ -1098,6 +1105,13 @@ public final class SearchEvent {
continue;
}
// check modifier constraint (author)
if (this.query.modifier.author != null && !page.dc_creator().toLowerCase().contains(this.query.modifier.author.toLowerCase()) /*!this.query.modifier.author.equalsIgnoreCase(page.dc_creator())*/) {
if (log.isFine()) log.fine("dropped RWI: author constraint = " + this.query.modifier.author);
if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet();
continue;
}
// Check for blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page.url())) {
if (log.isFine()) log.fine("dropped RWI: url is blacklisted in url blacklist");