mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merge branch 'master' of ssh://gitorious.org/yacy/rc1
This commit is contained in:
commit
8e301cd917
|
@ -762,6 +762,9 @@ search.navigation=hosts,authors,namespace,topics,filetype,protocol
|
|||
all search results are valid without verification
|
||||
search.verify = iffresh
|
||||
|
||||
search.excludehosts=
|
||||
search.excludehosth=
|
||||
|
||||
# in case that a link verification fails then the corresponding index reference can be
|
||||
# deleted to clean up the index. If this property is set then failed index verification in
|
||||
# the cases of nocache, iffresh and ifexist causes an index deletion
|
||||
|
|
|
@ -109,12 +109,18 @@
|
|||
</select>
|
||||
</dd>
|
||||
|
||||
<dt>Exclude Hosts</dt>
|
||||
<dd>List of hosts that shall be excluded from search results by default but can be included using the site:<host> operator:<br/>
|
||||
<input type="text" name="search.excludehosts" value="#[search.excludehosts]#" size="60" /><br/>
|
||||
#[search.excludehosth]#
|
||||
</dd>
|
||||
|
||||
<dt>'About' Column<br/>(shown in a column alongside<br/>with the search result page)</dt>
|
||||
<dd><input type="text" name="about.headline" value="#[about.headline]#" size="60" />(Headline)</br>
|
||||
<textarea name="about.body" cols="60" rows="8">#[about.body]#</textarea>(Content)
|
||||
</dd>
|
||||
|
||||
<dt> </dt>
|
||||
<dt> </dt>
|
||||
<dd>
|
||||
<input type="submit" name="searchpage_set" value="Change Search Page" />
|
||||
<input type="submit" name="searchpage_default" value="Set to Default Values" />
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import de.anomic.data.WorkTables;
|
||||
|
@ -93,6 +94,10 @@ public class ConfigPortal {
|
|||
sb.setConfig("about.headline", post.get("about.headline", ""));
|
||||
sb.setConfig("about.body", post.get("about.body", ""));
|
||||
|
||||
String excludehosts = post.get("search.excludehosts", "");
|
||||
sb.setConfig("search.excludehosts", excludehosts);
|
||||
sb.setConfig("search.excludehosth", DigestURI.hosthashes(excludehosts));
|
||||
|
||||
// construct navigation String
|
||||
String nav = "";
|
||||
if (post.getBoolean("search.navigation.hosts", false)) nav += "hosts,";
|
||||
|
@ -126,8 +131,10 @@ public class ConfigPortal {
|
|||
sb.setConfig("search.result.show.pictures", false);
|
||||
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, "iffresh");
|
||||
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, "true");
|
||||
prop.put("about.headline", "");
|
||||
prop.put("about.body", "");
|
||||
sb.setConfig("about.headline", "");
|
||||
sb.setConfig("about.body", "");
|
||||
sb.setConfig("search.excludehosts", "");
|
||||
sb.setConfig("search.excludehosth", "");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,6 +174,9 @@ public class ConfigPortal {
|
|||
prop.put("about.headline", sb.getConfig("about.headline", ""));
|
||||
prop.put("about.body", sb.getConfig("about.body", ""));
|
||||
|
||||
prop.put("search.excludehosts", sb.getConfig("search.excludehosts", ""));
|
||||
prop.put("search.excludehosth", sb.getConfig("search.excludehosth", ""));
|
||||
|
||||
final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html");
|
||||
prop.put("popupFront", 0);
|
||||
prop.put("popupSearch", 0);
|
||||
|
|
|
@ -244,6 +244,7 @@ public final class search {
|
|||
null,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
authorhash,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
|
@ -305,6 +306,7 @@ public final class search {
|
|||
constraint,
|
||||
false,
|
||||
sitehash,
|
||||
null,
|
||||
authorhash,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
|
|
|
@ -618,6 +618,7 @@ public class yacysearch {
|
|||
constraint,
|
||||
true,
|
||||
sitehash,
|
||||
DigestURI.hosthashess(sb.getConfig("search.excludehosth", "")),
|
||||
authorhash,
|
||||
DigestURI.TLD_any_zone_filter,
|
||||
client,
|
||||
|
|
|
@ -30,6 +30,8 @@ package net.yacy.kelondro.data.meta;
|
|||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import net.yacy.cora.document.ASCII;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
|
@ -71,6 +73,37 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
|
|||
}
|
||||
return (url == null) ? null : ASCII.String(url.hash(), 6, 6);
|
||||
}
|
||||
|
||||
/**
|
||||
* from a given list of hosts make a list of host hashes
|
||||
* the list is separated by comma
|
||||
* @param hostlist
|
||||
* @return list of host hashes without separation
|
||||
*/
|
||||
public static String hosthashes(final String hostlist) {
|
||||
String[] hs = hostlist.split(",");
|
||||
StringBuilder sb = new StringBuilder(hostlist.length());
|
||||
for (String h: hs) {
|
||||
if (h == null) continue;
|
||||
h = h.trim();
|
||||
if (h.length() == 0) continue;
|
||||
h = hosthash(h);
|
||||
if (h == null || h.length() != 6) continue;
|
||||
sb.append(h);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static Set<String> hosthashess(String hosthashes) {
|
||||
if (hosthashes == null || hosthashes.length() == 0) return null;
|
||||
HashSet<String> h = new HashSet<String>();
|
||||
assert hosthashes.length() % 6 == 0;
|
||||
for (int i = 0; i < hosthashes.length(); i = i + 6) {
|
||||
h.add(hosthashes.substring(i, i + 6));
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* DigestURI from File
|
||||
|
|
|
@ -29,8 +29,10 @@ package net.yacy.search.query;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
|
@ -124,6 +126,7 @@ public final class QueryParams {
|
|||
private final Segment indexSegment;
|
||||
public final String host; // this is the client host that starts the query, not a site operator
|
||||
public final String sitehash; // this is a domain hash, 6 bytes long or null
|
||||
public final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
|
||||
public final String authorhash;
|
||||
public final String tenant;
|
||||
public final Modifier modifier;
|
||||
|
@ -182,6 +185,7 @@ public final class QueryParams {
|
|||
this.snippetCacheStrategy = null;
|
||||
this.host = null;
|
||||
this.sitehash = null;
|
||||
this.siteexcludes = null;
|
||||
this.authorhash = null;
|
||||
this.remotepeer = null;
|
||||
this.time = Long.valueOf(System.currentTimeMillis());
|
||||
|
@ -208,6 +212,7 @@ public final class QueryParams {
|
|||
final Searchdom domType, final int domMaxTargets,
|
||||
final Bitfield constraint, final boolean allofconstraint,
|
||||
final String site,
|
||||
final Set<String> siteexcludes,
|
||||
final String authorhash,
|
||||
final int domainzone,
|
||||
final String host,
|
||||
|
@ -250,6 +255,7 @@ public final class QueryParams {
|
|||
this.constraint = constraint;
|
||||
this.allofconstraint = allofconstraint;
|
||||
this.sitehash = site; assert site == null || site.length() == 6;
|
||||
this.siteexcludes = siteexcludes != null && siteexcludes.size() == 0 ? null: siteexcludes;
|
||||
this.authorhash = authorhash; assert authorhash == null || !authorhash.isEmpty();
|
||||
this.snippetCacheStrategy = snippetCacheStrategy;
|
||||
this.host = host;
|
||||
|
@ -491,6 +497,8 @@ public final class QueryParams {
|
|||
context.append(asterisk);
|
||||
context.append(this.sitehash);
|
||||
context.append(asterisk);
|
||||
context.append(this.siteexcludes);
|
||||
context.append(asterisk);
|
||||
context.append(this.authorhash);
|
||||
context.append(asterisk);
|
||||
context.append(this.targetlang);
|
||||
|
|
|
@ -311,6 +311,9 @@ public final class RWIProcess extends Thread
|
|||
// check site constraints
|
||||
final String hosthash = iEntry.hosthash();
|
||||
if ( this.query.sitehash == null ) {
|
||||
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) {
|
||||
continue pollloop;
|
||||
}
|
||||
// no site constraint there; maybe collect host navigation information
|
||||
if ( nav_hosts && this.query.urlMask_isCatchall ) {
|
||||
this.hostNavigator.inc(hosthash);
|
||||
|
|
Loading…
Reference in New Issue
Block a user