From 3428b6f13bcc0138f3741092017388c4df6496bd Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 7 Sep 2015 02:36:22 +0200 Subject: [PATCH] improve filtering by filetype navigator. The used url-filter for filetype doesn't require ".ext" resulting in too many matches, add a sort-out filter for RWI results. --- source/net/yacy/search/query/QueryParams.java | 2 +- source/net/yacy/search/query/SearchEvent.java | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 74b2697ee..fea69f626 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -205,7 +205,7 @@ public final class QueryParams { String protocolfilter = modifier.protocol == null ? ".*" : modifier.protocol; String defaulthostprefix = modifier.protocol == null ? "www" : modifier.protocol; String hostfilter = modifier.sitehost == null && tld == null ? ".*" : modifier.sitehost == null ? ".*\\." + tld : modifier.sitehost.startsWith(defaulthostprefix + ".") ? "(" + defaulthostprefix + "\\.)?" + modifier.sitehost.substring(4) : "(" + defaulthostprefix + "\\.)?" + modifier.sitehost; - String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*"; + String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*"; // TODO: should be ".ext" but while/comment above suggests not -> add filetype contrain pullOneFilteredFromRWI() String filter = protocolfilter + "..." + hostfilter + "." + filefilter; if (!filter.equals(".*....*..*")) { Pattern r = Pattern.compile("(\\.|(\\.\\*))\\.\\*"); diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index b2df72d37..ab2ab70b1 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -1159,6 +1159,13 @@ public final class SearchEvent { continue; } + // check modifier constraint filetype (using fileextension) + if (this.query.modifier.filetype != null && !this.query.modifier.filetype.equals(ext)) { + if (log.isFine()) log.fine("dropped RWI: file type constraint = " + this.query.modifier.filetype); + if (page.word().local()) this.local_rwi_available.decrementAndGet(); else this.remote_rwi_available.decrementAndGet(); + continue; + } + // check modifier constraint (language) // TODO: : page.language() never null but defaults to "en" (may cause false drop of result) if (this.query.modifier.language != null && !this.query.modifier.language.equals(page.language())) {