Merge branch 'master' of ssh://gitorious.org/yacy/rc1

This commit is contained in:
Michael Christen 2011-12-14 12:58:17 +01:00
commit 8e301cd917
8 changed files with 69 additions and 3 deletions

View File

@ -762,6 +762,9 @@ search.navigation=hosts,authors,namespace,topics,filetype,protocol
all search results are valid without verification
search.verify = iffresh
search.excludehosts=
search.excludehosth=
# in case that a link verification fails then the corresponding index reference can be
# deleted to clean up the index. If this property is set then failed index verification in
# the cases of nocache, iffresh and ifexist causes an index deletion

View File

@ -109,12 +109,18 @@
</select>
</dd>
<dt>Exclude Hosts</dt>
<dd>List of hosts that shall be excluded from search results by default but can be included using the site:&lt;host&gt; operator:<br/>
<input type="text" name="search.excludehosts" value="#[search.excludehosts]#" size="60" /><br/>
#[search.excludehosth]#
</dd>
<dt>'About' Column<br/>(shown in a column alongside<br/>with the search result page)</dt>
<dd><input type="text" name="about.headline" value="#[about.headline]#" size="60" />(Headline)</br>
<textarea name="about.body" cols="60" rows="8">#[about.body]#</textarea>(Content)
</dd>
<dt>&nbsp;</dt>
<dt>&nbsp;</dt>
<dd>
<input type="submit" name="searchpage_set" value="Change Search Page" />&nbsp;&nbsp;
<input type="submit" name="searchpage_default" value="Set to Default Values" />

View File

@ -26,6 +26,7 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.data.WorkTables;
@ -93,6 +94,10 @@ public class ConfigPortal {
sb.setConfig("about.headline", post.get("about.headline", ""));
sb.setConfig("about.body", post.get("about.body", ""));
String excludehosts = post.get("search.excludehosts", "");
sb.setConfig("search.excludehosts", excludehosts);
sb.setConfig("search.excludehosth", DigestURI.hosthashes(excludehosts));
// construct navigation String
String nav = "";
if (post.getBoolean("search.navigation.hosts", false)) nav += "hosts,";
@ -126,8 +131,10 @@ public class ConfigPortal {
sb.setConfig("search.result.show.pictures", false);
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY, "iffresh");
sb.setConfig(SwitchboardConstants.SEARCH_VERIFY_DELETE, "true");
prop.put("about.headline", "");
prop.put("about.body", "");
sb.setConfig("about.headline", "");
sb.setConfig("about.body", "");
sb.setConfig("search.excludehosts", "");
sb.setConfig("search.excludehosth", "");
}
}
@ -167,6 +174,9 @@ public class ConfigPortal {
prop.put("about.headline", sb.getConfig("about.headline", ""));
prop.put("about.body", sb.getConfig("about.body", ""));
prop.put("search.excludehosts", sb.getConfig("search.excludehosts", ""));
prop.put("search.excludehosth", sb.getConfig("search.excludehosth", ""));
final String browserPopUpPage = sb.getConfig(SwitchboardConstants.BROWSER_POP_UP_PAGE, "ConfigBasic.html");
prop.put("popupFront", 0);
prop.put("popupSearch", 0);

View File

@ -244,6 +244,7 @@ public final class search {
null,
false,
sitehash,
null,
authorhash,
DigestURI.TLD_any_zone_filter,
client,
@ -305,6 +306,7 @@ public final class search {
constraint,
false,
sitehash,
null,
authorhash,
DigestURI.TLD_any_zone_filter,
client,

View File

@ -618,6 +618,7 @@ public class yacysearch {
constraint,
true,
sitehash,
DigestURI.hosthashess(sb.getConfig("search.excludehosth", "")),
authorhash,
DigestURI.TLD_any_zone_filter,
client,

View File

@ -30,6 +30,8 @@ package net.yacy.kelondro.data.meta;
import java.io.File;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Set;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
@ -71,6 +73,37 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
}
return (url == null) ? null : ASCII.String(url.hash(), 6, 6);
}
/**
* from a given list of hosts make a list of host hashes
* the list is separated by comma
* @param hostlist
* @return list of host hashes without separation
*/
public static String hosthashes(final String hostlist) {
String[] hs = hostlist.split(",");
StringBuilder sb = new StringBuilder(hostlist.length());
for (String h: hs) {
if (h == null) continue;
h = h.trim();
if (h.length() == 0) continue;
h = hosthash(h);
if (h == null || h.length() != 6) continue;
sb.append(h);
}
return sb.toString();
}
public static Set<String> hosthashess(String hosthashes) {
if (hosthashes == null || hosthashes.length() == 0) return null;
HashSet<String> h = new HashSet<String>();
assert hosthashes.length() % 6 == 0;
for (int i = 0; i < hosthashes.length(); i = i + 6) {
h.add(hosthashes.substring(i, i + 6));
}
return h;
}
/**
* DigestURI from File

View File

@ -29,8 +29,10 @@ package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
@ -124,6 +126,7 @@ public final class QueryParams {
private final Segment indexSegment;
public final String host; // this is the client host that starts the query, not a site operator
public final String sitehash; // this is a domain hash, 6 bytes long or null
public final Set<String> siteexcludes; // set of domain hashes that are excluded if not included by sitehash
public final String authorhash;
public final String tenant;
public final Modifier modifier;
@ -182,6 +185,7 @@ public final class QueryParams {
this.snippetCacheStrategy = null;
this.host = null;
this.sitehash = null;
this.siteexcludes = null;
this.authorhash = null;
this.remotepeer = null;
this.time = Long.valueOf(System.currentTimeMillis());
@ -208,6 +212,7 @@ public final class QueryParams {
final Searchdom domType, final int domMaxTargets,
final Bitfield constraint, final boolean allofconstraint,
final String site,
final Set<String> siteexcludes,
final String authorhash,
final int domainzone,
final String host,
@ -250,6 +255,7 @@ public final class QueryParams {
this.constraint = constraint;
this.allofconstraint = allofconstraint;
this.sitehash = site; assert site == null || site.length() == 6;
this.siteexcludes = siteexcludes != null && siteexcludes.size() == 0 ? null: siteexcludes;
this.authorhash = authorhash; assert authorhash == null || !authorhash.isEmpty();
this.snippetCacheStrategy = snippetCacheStrategy;
this.host = host;
@ -491,6 +497,8 @@ public final class QueryParams {
context.append(asterisk);
context.append(this.sitehash);
context.append(asterisk);
context.append(this.siteexcludes);
context.append(asterisk);
context.append(this.authorhash);
context.append(asterisk);
context.append(this.targetlang);

View File

@ -311,6 +311,9 @@ public final class RWIProcess extends Thread
// check site constraints
final String hosthash = iEntry.hosthash();
if ( this.query.sitehash == null ) {
if (this.query.siteexcludes != null && this.query.siteexcludes.contains(hosthash)) {
continue pollloop;
}
// no site constraint there; maybe collect host navigation information
if ( nav_hosts && this.query.urlMask_isCatchall ) {
this.hostNavigator.inc(hosthash);