added the vocabulary navigator. It can be very simply tested by

switching on the locale dictionaries.
This commit is contained in:
Michael Peter Christen 2012-01-17 01:53:08 +01:00
parent 37d43e5589
commit 83009d86f7
12 changed files with 280 additions and 40 deletions

View File

@ -30,6 +30,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
@ -44,6 +45,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.document.Autotagging.Metatag;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.data.word.WordReferenceFactory;
@ -234,6 +236,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
new HashSet<Metatag>(),
"", // no navigation
CacheStrategy.CACHEONLY,
count,
@ -296,6 +299,7 @@ public final class search {
prefer,
ContentDomain.contentdomParser(contentdom),
language,
new HashSet<Metatag>(),
"", // no navigation
CacheStrategy.CACHEONLY,
count,

View File

@ -88,6 +88,9 @@ $(function() {
collapsible: true,
header: "h3"
});
#{sidebarVocabulary}#
$("#sidebar#[vocabulary]#").accordion({});
#{/sidebarVocabulary}#
$("#sidebarDomains").accordion({});
$("#sidebarProtocols").accordion({});
$("#sidebarProtocols").accordion('activate', false);

View File

@ -28,6 +28,8 @@
// if the shell's current path is HTROOT
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -44,6 +46,8 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Autotagging.Metatag;
import net.yacy.document.Autotagging.Vocabulary;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
import net.yacy.document.LibraryProvider;
@ -81,8 +85,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class yacysearch
{
public class yacysearch {
public static serverObjects respond(
final RequestHeader header,
@ -115,6 +118,15 @@ public class yacysearch
final servletProperties prop = new servletProperties();
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
// produce vocabulary navigation sidebars
Collection<Vocabulary> vocabularies = LibraryProvider.autotagging.getVocabularies();
int j = 0;
for (Vocabulary v: vocabularies) {
prop.put("sidebarVocabulary_" + j + "_vocabulary", v.getName());
j++;
}
prop.put("sidebarVocabulary", j);
// get segment
Segment indexSegment = null;
if ( post != null && post.containsKey("segment") ) {
@ -386,11 +398,13 @@ public class yacysearch
urlmask = "smb://.*";
modifier.append("/smb ");
}
if ( querystring.indexOf("/file", 0) >= 0 ) {
querystring = querystring.replace("/file", "");
urlmask = "file://.*";
modifier.append("/file ");
}
if ( querystring.indexOf("/location", 0) >= 0 ) {
querystring = querystring.replace("/location", "");
if ( constraint == null ) {
@ -399,6 +413,7 @@ public class yacysearch
constraint.set(Condenser.flag_cat_haslocation, true);
modifier.append("/location ");
}
final int lrp = querystring.indexOf("/language/", 0);
String language = "";
if ( lrp >= 0 ) {
@ -407,8 +422,9 @@ public class yacysearch
}
querystring = querystring.replace("/language/" + language, "");
language = language.toLowerCase();
modifier.append("/language/").append(language).append(" ");
modifier.append("/language/").append(language).append(' ');
}
final int inurl = querystring.indexOf("inurl:", 0);
if ( inurl >= 0 ) {
int ftb = querystring.indexOf(' ', inurl);
@ -420,8 +436,9 @@ public class yacysearch
if ( !urlstr.isEmpty() ) {
urlmask = urlmask == null ? ".*" + urlstr + ".*" : urlmask + urlstr + ".*";
}
modifier.append("inurl:").append(urlstr).append(" ");
modifier.append("inurl:").append(urlstr).append(' ');
}
final int filetype = querystring.indexOf("filetype:", 0);
if ( filetype >= 0 ) {
int ftb = querystring.indexOf(' ', filetype);
@ -440,8 +457,31 @@ public class yacysearch
urlmask = urlmask + ".*\\." + ft;
}
}
modifier.append("filetype:").append(ft).append(" ");
modifier.append("filetype:").append(ft).append(' ');
}
int voc = 0;
Collection<Metatag> metatags = new ArrayList<Metatag>(1);
while ((voc = querystring.indexOf("/vocabulary/", 0)) >= 0) {
String vocabulary = "";
int ve = querystring.indexOf(' ', voc + 12);
if (ve < 0) {
vocabulary = querystring.substring(voc);
querystring = querystring.substring(0, voc).trim();
} else {
vocabulary = querystring.substring(voc + 1, ve);
querystring = querystring.substring(0, voc) + querystring.substring(ve);
}
modifier.append(vocabulary).append(' ');
vocabulary = vocabulary.substring(12);
int p = vocabulary.indexOf('/');
if (p > 0) {
String k = vocabulary.substring(0, p);
String v = vocabulary.substring(p + 1);
metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v));
}
}
String tenant = null;
if ( post.containsKey("tenant") ) {
tenant = post.get("tenant");
@ -456,6 +496,7 @@ public class yacysearch
}
}
}
final int site = querystring.indexOf("site:", 0);
String sitehash = null;
String sitehost = null;
@ -473,7 +514,7 @@ public class yacysearch
sitehost = sitehost.substring(0, sitehost.length() - 1);
}
sitehash = DigestURI.hosthash(sitehost);
modifier.append("site:").append(sitehost).append(" ");
modifier.append("site:").append(sitehost).append(' ');
}
final int heuristicScroogle = querystring.indexOf("/heuristic/scroogle", 0);
@ -509,10 +550,11 @@ public class yacysearch
}
author = querystring.substring(authori + 7, ftb);
querystring = querystring.replace("author:" + author, "");
modifier.append("author:").append(author).append(" ");
modifier.append("author:").append(author).append(' ');
}
authorhash = ASCII.String(Word.word2hash(author));
}
final int tld = querystring.indexOf("tld:", 0);
if ( tld >= 0 ) {
int ftb = querystring.indexOf(' ', tld);
@ -521,7 +563,7 @@ public class yacysearch
}
String domain = querystring.substring(tld + 4, ftb);
querystring = querystring.replace("tld:" + domain, "");
modifier.append("tld:").append(domain).append(" ");
modifier.append("tld:").append(domain).append(' ');
while ( domain.length() > 0 && domain.charAt(0) == '.' ) {
domain = domain.substring(1);
}
@ -695,6 +737,7 @@ public class yacysearch
prefermask,
contentdom,
language,
metatags,
navigation,
snippetFetchStrategy,
maximumRecords,

View File

@ -61,6 +61,15 @@
</div>
#(/nav-authors)#
#{nav-vocabulary}#
<div id="sidebar#[navname]#" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">#[navname]# Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
</div>
#{/nav-vocabulary}#
#(nav-about)#::
<div id="sidebarAbout" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">#[headline]#</h3>

View File

@ -25,9 +25,11 @@
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.document.Autotagging;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.Formatter;
@ -219,6 +221,77 @@ public class yacysearchtrailer {
prop.put("nav-filetypes_element_" + i + "_nl", 0);
}
// vocabulary navigators
final Map<String, ScoreMap<String>> vocabularyNavigators = theSearch.getVocabularyNavigators();
if (vocabularyNavigators != null && vocabularyNavigators.size() > 0) {
int navvoccount = 0;
vocnav: for (Map.Entry<String, ScoreMap<String>> ve: vocabularyNavigators.entrySet()) {
String navname = ve.getKey();
if (ve.getValue() == null || ve.getValue().isEmpty()) {
continue vocnav;
}
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_navname", navname);
navigatorIterator = ve.getValue().keys(false);
int i = 0;
String anav;
while (i < 20 && navigatorIterator.hasNext()) {
name = navigatorIterator.next();
count = ve.getValue().get(name);
anav = "/vocabulary/" + navname + "/" + Autotagging.encodePrintname(name);
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_name", name);
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators).toString() + "\">" + name + " (" + count + ")</a>");
prop.putJSON("nav-vocabulary_" + navvoccount + "_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators).toString());
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_count", count);
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", anav);
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 1);
i++;
}
prop.put("nav-vocabulary_" + navvoccount + "_element", i);
i--;
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 0);
navvoccount++;
}
prop.put("nav-vocabulary", navvoccount);
} else {
prop.put("nav-vocabulary", 0);
}
/*
html
#{nav-vocabulary}#
<div id="sidebar#[navname]#" style="float: right; margin-top:5px; width: 220px;">
<h3 style="padding-left:25px;">#[navname]# Navigator</h3>
<div><ul style="padding-left: 0px;">#{element}#
<li>#[url]#</li>
#{/element}#</ul></div>
</div>
#{/nav-vocabulary}#
xml
#{nav-vocabulary}#
<yacy:facet name="#[navname]#" displayname="#[navname]#" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#{/nav-vocabulary}#
json
#{nav-vocabulary}#
{
"facetname": "#[navname]#",
"displayname": "#[navname]#",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#{nav-vocabulary}#
*/
// about box
final String aboutBody = env.getConfig("about.body", "");
final String aboutHeadline = env.getConfig("about.headline", "");

View File

@ -63,7 +63,20 @@
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#(/nav-authors)##(nav-topics)#::
},#(/nav-authors)##{nav-vocabulary}#
{
"facetname": "#[navname]#",
"displayname": "#[navname]#",
"type": "String",
"min": "0",
"max": "0",
"mean": "0",
"elements": [
#{element}#
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
#{/element}#
]
},#{nav-vocabulary}##(nav-topics)#::
{
"facetname": "topics",
"displayname": "Topics",

View File

@ -7,40 +7,47 @@
</yacy:facet>
#(/nav-domains)#
#(nav-namespace)#::
<yacy:facet name="domains" displayname="Namespace" type="String" min="0" max="0" mean="0">
<yacy:facet name="namespace" displayname="Namespace" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-namespace)#
#(nav-authors)#::
<yacy:facet name="domains" displayname="Authors" type="String" min="0" max="0" mean="0">
<yacy:facet name="authors" displayname="Authors" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-authors)#
#(nav-filetype)#::
<yacy:facet name="domains" displayname="Filetypes" type="String" min="0" max="0" mean="0">
<yacy:facet name="filetypes" displayname="Filetypes" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-filetype)#
#(nav-protocol)#::
<yacy:facet name="domains" displayname="Protocols" type="String" min="0" max="0" mean="0">
<yacy:facet name="protocols" displayname="Protocols" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-protocol)#
#(nav-topics)#::
<yacy:facet name="topwords" displayname="Topics" type="String" min="0" max="0" mean="0">
<yacy:facet name="topics" displayname="Topics" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#(/nav-topics)#
#{nav-vocabulary}#
<yacy:facet name="#[navname]#" displayname="#[navname]#" type="String" min="0" max="0" mean="0">
#{element}#
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
#{/element}#
</yacy:facet>
#{/nav-vocabulary}#
</yacy:navigation>
<opensearch:totalResults>#[num-results_totalcount]#</opensearch:totalResults>

View File

@ -24,6 +24,7 @@ import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@ -43,12 +44,12 @@ import net.yacy.kelondro.util.FileUtils;
*/
public class Autotagging {
final static Object PRESENT = new Object();
private final static Object PRESENT = new Object();
final char prefixChar;
final File autotaggingPath;
final Map<String, Vocabulary> vocabularies;
final Map<String, Object> allTags;
public final char prefixChar;
private final File autotaggingPath;
private final Map<String, Vocabulary> vocabularies;
private final Map<String, Object> allTags;
public Autotagging(final File autotaggingPath, char prefixChar) {
this.vocabularies = new ConcurrentHashMap<String, Vocabulary>();
@ -92,6 +93,10 @@ public class Autotagging {
}
}
public Collection<Vocabulary> getVocabularies() {
return this.vocabularies.values();
}
public Set<String> allTags() {
return this.allTags.keySet();
}
@ -138,7 +143,7 @@ public class Autotagging {
word = normalizeWord(word);
for (Map.Entry<String, Vocabulary> v: this.vocabularies.entrySet()) {
tag = v.getValue().getMetatag(word);
if (tag != null) return tag.getMetatag();
if (tag != null) return tag.toString();
}
return null;
}
@ -178,6 +183,11 @@ public class Autotagging {
continue vocloop;
}
k = line.substring(0, p).trim();
k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute
k = k.replaceAll(" /", ", ");
k = k.replaceAll("\\+", ",");
k = k.replaceAll("/", ",");
k = k.replaceAll(" ", " ");
v = line.substring(p + 1);
tags = v.split(",");
tagloop: for (String t: tags) {
@ -238,6 +248,8 @@ public class Autotagging {
private final static Pattern PATTERN_OE = Pattern.compile("\u00F6");
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
private final static Pattern PATTERN_UL = Pattern.compile("_");
private final static Pattern PATTERN_SP = Pattern.compile(" ");
private static final String normalizeWord(String word) {
word = word.trim().toLowerCase();
@ -255,12 +267,12 @@ public class Autotagging {
this.vocName = vocName;
this.print = print;
}
public Metatag(String metatag) {
public Metatag(String metatag) throws RuntimeException {
assert metatag.charAt(0) == Autotagging.this.prefixChar;
int p = metatag.indexOf(':');
assert p > 0;
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
this.vocName = metatag.substring(1, p);
this.print = metatag.substring(p + 1);
this.print = decodeMaskname(metatag.substring(p + 1));
}
public String getVocabularyName() {
return this.vocName;
@ -268,19 +280,45 @@ public class Autotagging {
public String getPrintName() {
return this.print;
}
public String getMetatag() {
return Autotagging.this.prefixChar + this.vocName + ":" + this.print.replaceAll(" ", "_");
@Override
public String toString() {
return Autotagging.this.prefixChar + this.vocName + ":" + encodePrintname(this.print);
}
@Override
public boolean equals(Object m) {
Metatag m0 = (Metatag) m;
return this.vocName.equals(m0.vocName) && this.print.equals(m0.print);
}
@Override
public int hashCode() {
return this.vocName.hashCode() + this.print.hashCode();
}
}
public static final String encodePrintname(String printname) {
return PATTERN_SP.matcher(printname).replaceAll("_");
}
public static final String decodeMaskname(String maskname) {
return PATTERN_UL.matcher(maskname).replaceAll(" ");
}
public Metatag metatag(String vocName, String print) {
return new Metatag(vocName, print);
}
public Metatag metatag(String metatag) {
public Metatag metatag(String metatag) throws RuntimeException {
return new Metatag(metatag);
}
public static boolean metatagAppearIn(final Metatag metatag, final String[] tags) {
String tag = metatag.toString();
for (String s: tags) {
if (tag.equals(s)) return true;
}
return false;
}
public static void main(String[] args) {
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
for (Map.Entry<String, Vocabulary> entry: a.vocabularies.entrySet()) {

View File

@ -28,6 +28,8 @@ package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -42,6 +44,7 @@ import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Autotagging;
import net.yacy.document.Condenser;
import net.yacy.document.parser.html.AbstractScraper;
import net.yacy.document.parser.html.CharacterCoding;
@ -113,6 +116,7 @@ public final class QueryParams {
public final boolean urlMask_isCatchall, prefer_isMatchnothing;
public final ContentDomain contentdom;
public final String targetlang;
public final Collection<Autotagging.Metatag> metatags;
public final String navigators;
public final Searchdom domType;
public final int zonecode;
@ -176,6 +180,7 @@ public final class QueryParams {
this.itemsPerPage = itemsPerPage;
this.offset = 0;
this.targetlang = "en";
this.metatags = new ArrayList<Autotagging.Metatag>(0);
this.domType = Searchdom.LOCAL;
this.zonecode = DigestURI.TLD_any_zone_filter;
this.domMaxTargets = 0;
@ -205,6 +210,7 @@ public final class QueryParams {
final String modifier,
final int maxDistance, final String prefer, final ContentDomain contentdom,
final String language,
final Collection<Autotagging.Metatag> metatags,
final String navigators,
final CacheStrategy snippetCacheStrategy,
final int itemsPerPage, final int offset, final String urlMask,
@ -247,6 +253,7 @@ public final class QueryParams {
this.prefer_isMatchnothing = this.prefer.toString().equals(matchnothing_pattern.toString());
assert language != null;
this.targetlang = language;
this.metatags = metatags;
this.navigators = navigators;
this.domType = domType;
this.zonecode = domainzone;
@ -506,6 +513,8 @@ public final class QueryParams {
context.append(asterisk);
context.append(this.maxDistance);
context.append(asterisk);
context.append(this.modifier.s);
context.append(asterisk);
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());
if (anonymized) {
this.idCacheAnon = context.toString();

View File

@ -46,7 +46,10 @@ import net.yacy.cora.sorting.ConcurrentScoreMap;
import net.yacy.cora.sorting.ScoreMap;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
import net.yacy.document.Autotagging;
import net.yacy.document.Autotagging.Metatag;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.Word;
@ -101,6 +104,7 @@ public final class RWIProcess extends Thread
private final ScoreMap<String> namespaceNavigator; // a counter for name spaces
private final ScoreMap<String> protocolNavigator; // a counter for protocol types
private final ScoreMap<String> filetypeNavigator; // a counter for file types
private final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies
public RWIProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final boolean remote) {
// we collect the urlhashes and construct a list with urlEntry objects
@ -132,6 +136,7 @@ public final class RWIProcess extends Thread
this.namespaceNavigator = new ConcurrentScoreMap<String>();
this.protocolNavigator = new ConcurrentScoreMap<String>();
this.filetypeNavigator = new ConcurrentScoreMap<String>();
this.vocabularyNavigator = new ConcurrentHashMap<String, ScoreMap<String>>();
this.ref = new ConcurrentScoreMap<String>();
this.feedersAlive = new AtomicInteger(0);
this.feedersTerminated = new AtomicInteger(0);
@ -349,8 +354,7 @@ public final class RWIProcess extends Thread
this.urlhashes.putUnique(iEntry.urlhash());
rankingtryloop: while ( true ) {
try {
this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order
.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch ( final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation
@ -482,8 +486,7 @@ public final class RWIProcess extends Thread
m = this.doubleDomCache.get(hosthash);
if ( m == null ) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned
m =
new WeakPriorityBlockingQueue<WordReferenceVars>((this.query.specialRights)
m = new WeakPriorityBlockingQueue<WordReferenceVars>((this.query.specialRights)
? maxDoubleDomSpecial
: maxDoubleDomAll);
this.doubleDomCache.put(hosthash, m);
@ -504,8 +507,7 @@ public final class RWIProcess extends Thread
WeakPriorityBlockingQueue.Element<WordReferenceVars> bestEntry = null;
WeakPriorityBlockingQueue.Element<WordReferenceVars> o;
synchronized ( this.doubleDomCache ) {
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i =
this.doubleDomCache.values().iterator();
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
while ( i.hasNext() ) {
try {
m = i.next();
@ -557,10 +559,9 @@ public final class RWIProcess extends Thread
final long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
int p = -1;
long timeleft;
while ( (timeleft = timeout - System.currentTimeMillis()) > 0 ) {
takeloop: while ( (timeleft = timeout - System.currentTimeMillis()) > 0 ) {
//System.out.println("timeleft = " + timeleft);
final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi =
takeRWI(skipDoubleDom, timeleft);
final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
if ( obrwi == null ) {
return null; // all time was already wasted in takeRWI to get another element
}
@ -635,6 +636,20 @@ public final class RWIProcess extends Thread
continue;
}
// check vocabulary constraint
final String tags = page.dc_subject();
final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
if (this.query.metatags != null && this.query.metatags.size() > 0) {
// all metatags must appear in the tags list
for (Metatag metatag: this.query.metatags) {
if (!Autotagging.metatagAppearIn(metatag, taglist)) {
this.sortout++;
Log.logInfo("RWIProcess", "sorted out " + page.url());
continue takeloop;
}
}
}
// evaluate information of metadata for navigation
// author navigation:
if ( pageauthor != null && pageauthor.length() > 0 ) {
@ -654,6 +669,12 @@ public final class RWIProcess extends Thread
continue;
}
// check Scanner
if ( !Scanner.acceptURL(page.url()) ) {
this.sortout++;
continue;
}
// namespace navigation
String pagepath = page.url().getPath();
if ( (p = pagepath.indexOf(':')) >= 0 ) {
@ -675,10 +696,20 @@ public final class RWIProcess extends Thread
this.filetypeNavigator.inc(fileext);
}
// check Scanner
if ( !Scanner.acceptURL(page.url()) ) {
this.sortout++;
continue;
// vocabulary navigation
tagharvest: for (String tag: taglist) {
if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest;
try {
Metatag metatag = LibraryProvider.autotagging.metatag(tag);
ScoreMap<String> voc = this.vocabularyNavigator.get(metatag.getVocabularyName());
if (voc == null) {
voc = new ConcurrentScoreMap<String>();
this.vocabularyNavigator.put(metatag.getVocabularyName(), voc);
}
voc.inc(metatag.getPrintName());
} catch (RuntimeException e) {
// tag may not be well-formed
}
}
// accept url
@ -687,6 +718,8 @@ public final class RWIProcess extends Thread
return null;
}
final static Pattern SPACE_PATTERN = Pattern.compile(" ");
public int sizeQueue() {
int c = this.stack.sizeQueue();
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
@ -818,6 +851,10 @@ public final class RWIProcess extends Thread
return this.filetypeNavigator;
}
public Map<String,ScoreMap<String>> getVocabularyNavigators() {
return this.vocabularyNavigator;
}
public static final Comparator<Map.Entry<String, Integer>> mecomp =
new Comparator<Map.Entry<String, Integer>>() {
@Override

View File

@ -472,6 +472,10 @@ public final class SearchEvent
return this.rankingProcess.getFiletypeNavigator();
}
public Map<String,ScoreMap<String>> getVocabularyNavigators() {
return this.rankingProcess.getVocabularyNavigators();
}
public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
synchronized ( this.heuristics ) {
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));

View File

@ -481,7 +481,7 @@ public class SnippetProcess {
}
// get next entry
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(500, Math.max(100, this.timeout - System.currentTimeMillis())));
//if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false));
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
if (page == null) {