mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added the vocabulary navigator. It can be very simply tested by
switching on the locale dictionaries.
This commit is contained in:
parent
37d43e5589
commit
83009d86f7
|
@ -30,6 +30,7 @@
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
@ -44,6 +45,7 @@ import net.yacy.cora.protocol.RequestHeader;
|
|||
import net.yacy.cora.services.federated.yacy.CacheStrategy;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.document.Autotagging.Metatag;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.word.WordReference;
|
||||
import net.yacy.kelondro.data.word.WordReferenceFactory;
|
||||
|
@ -234,6 +236,7 @@ public final class search {
|
|||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Metatag>(),
|
||||
"", // no navigation
|
||||
CacheStrategy.CACHEONLY,
|
||||
count,
|
||||
|
@ -296,6 +299,7 @@ public final class search {
|
|||
prefer,
|
||||
ContentDomain.contentdomParser(contentdom),
|
||||
language,
|
||||
new HashSet<Metatag>(),
|
||||
"", // no navigation
|
||||
CacheStrategy.CACHEONLY,
|
||||
count,
|
||||
|
|
|
@ -88,6 +88,9 @@ $(function() {
|
|||
collapsible: true,
|
||||
header: "h3"
|
||||
});
|
||||
#{sidebarVocabulary}#
|
||||
$("#sidebar#[vocabulary]#").accordion({});
|
||||
#{/sidebarVocabulary}#
|
||||
$("#sidebarDomains").accordion({});
|
||||
$("#sidebarProtocols").accordion({});
|
||||
$("#sidebarProtocols").accordion('activate', false);
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
// if the shell's current path is HTROOT
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
@ -44,6 +46,8 @@ import net.yacy.cora.protocol.HeaderFramework;
|
|||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.ResponseHeader;
|
||||
import net.yacy.cora.services.federated.yacy.CacheStrategy;
|
||||
import net.yacy.document.Autotagging.Metatag;
|
||||
import net.yacy.document.Autotagging.Vocabulary;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
|
@ -81,8 +85,7 @@ import de.anomic.server.serverObjects;
|
|||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
|
||||
public class yacysearch
|
||||
{
|
||||
public class yacysearch {
|
||||
|
||||
public static serverObjects respond(
|
||||
final RequestHeader header,
|
||||
|
@ -115,6 +118,15 @@ public class yacysearch
|
|||
final servletProperties prop = new servletProperties();
|
||||
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
|
||||
|
||||
// produce vocabulary navigation sidebars
|
||||
Collection<Vocabulary> vocabularies = LibraryProvider.autotagging.getVocabularies();
|
||||
int j = 0;
|
||||
for (Vocabulary v: vocabularies) {
|
||||
prop.put("sidebarVocabulary_" + j + "_vocabulary", v.getName());
|
||||
j++;
|
||||
}
|
||||
prop.put("sidebarVocabulary", j);
|
||||
|
||||
// get segment
|
||||
Segment indexSegment = null;
|
||||
if ( post != null && post.containsKey("segment") ) {
|
||||
|
@ -386,11 +398,13 @@ public class yacysearch
|
|||
urlmask = "smb://.*";
|
||||
modifier.append("/smb ");
|
||||
}
|
||||
|
||||
if ( querystring.indexOf("/file", 0) >= 0 ) {
|
||||
querystring = querystring.replace("/file", "");
|
||||
urlmask = "file://.*";
|
||||
modifier.append("/file ");
|
||||
}
|
||||
|
||||
if ( querystring.indexOf("/location", 0) >= 0 ) {
|
||||
querystring = querystring.replace("/location", "");
|
||||
if ( constraint == null ) {
|
||||
|
@ -399,6 +413,7 @@ public class yacysearch
|
|||
constraint.set(Condenser.flag_cat_haslocation, true);
|
||||
modifier.append("/location ");
|
||||
}
|
||||
|
||||
final int lrp = querystring.indexOf("/language/", 0);
|
||||
String language = "";
|
||||
if ( lrp >= 0 ) {
|
||||
|
@ -407,8 +422,9 @@ public class yacysearch
|
|||
}
|
||||
querystring = querystring.replace("/language/" + language, "");
|
||||
language = language.toLowerCase();
|
||||
modifier.append("/language/").append(language).append(" ");
|
||||
modifier.append("/language/").append(language).append(' ');
|
||||
}
|
||||
|
||||
final int inurl = querystring.indexOf("inurl:", 0);
|
||||
if ( inurl >= 0 ) {
|
||||
int ftb = querystring.indexOf(' ', inurl);
|
||||
|
@ -420,8 +436,9 @@ public class yacysearch
|
|||
if ( !urlstr.isEmpty() ) {
|
||||
urlmask = urlmask == null ? ".*" + urlstr + ".*" : urlmask + urlstr + ".*";
|
||||
}
|
||||
modifier.append("inurl:").append(urlstr).append(" ");
|
||||
modifier.append("inurl:").append(urlstr).append(' ');
|
||||
}
|
||||
|
||||
final int filetype = querystring.indexOf("filetype:", 0);
|
||||
if ( filetype >= 0 ) {
|
||||
int ftb = querystring.indexOf(' ', filetype);
|
||||
|
@ -440,8 +457,31 @@ public class yacysearch
|
|||
urlmask = urlmask + ".*\\." + ft;
|
||||
}
|
||||
}
|
||||
modifier.append("filetype:").append(ft).append(" ");
|
||||
modifier.append("filetype:").append(ft).append(' ');
|
||||
}
|
||||
|
||||
int voc = 0;
|
||||
Collection<Metatag> metatags = new ArrayList<Metatag>(1);
|
||||
while ((voc = querystring.indexOf("/vocabulary/", 0)) >= 0) {
|
||||
String vocabulary = "";
|
||||
int ve = querystring.indexOf(' ', voc + 12);
|
||||
if (ve < 0) {
|
||||
vocabulary = querystring.substring(voc);
|
||||
querystring = querystring.substring(0, voc).trim();
|
||||
} else {
|
||||
vocabulary = querystring.substring(voc + 1, ve);
|
||||
querystring = querystring.substring(0, voc) + querystring.substring(ve);
|
||||
}
|
||||
modifier.append(vocabulary).append(' ');
|
||||
vocabulary = vocabulary.substring(12);
|
||||
int p = vocabulary.indexOf('/');
|
||||
if (p > 0) {
|
||||
String k = vocabulary.substring(0, p);
|
||||
String v = vocabulary.substring(p + 1);
|
||||
metatags.add(LibraryProvider.autotagging.metatag(LibraryProvider.autotagging.prefixChar + k + ":" + v));
|
||||
}
|
||||
}
|
||||
|
||||
String tenant = null;
|
||||
if ( post.containsKey("tenant") ) {
|
||||
tenant = post.get("tenant");
|
||||
|
@ -456,6 +496,7 @@ public class yacysearch
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
final int site = querystring.indexOf("site:", 0);
|
||||
String sitehash = null;
|
||||
String sitehost = null;
|
||||
|
@ -473,7 +514,7 @@ public class yacysearch
|
|||
sitehost = sitehost.substring(0, sitehost.length() - 1);
|
||||
}
|
||||
sitehash = DigestURI.hosthash(sitehost);
|
||||
modifier.append("site:").append(sitehost).append(" ");
|
||||
modifier.append("site:").append(sitehost).append(' ');
|
||||
}
|
||||
|
||||
final int heuristicScroogle = querystring.indexOf("/heuristic/scroogle", 0);
|
||||
|
@ -509,10 +550,11 @@ public class yacysearch
|
|||
}
|
||||
author = querystring.substring(authori + 7, ftb);
|
||||
querystring = querystring.replace("author:" + author, "");
|
||||
modifier.append("author:").append(author).append(" ");
|
||||
modifier.append("author:").append(author).append(' ');
|
||||
}
|
||||
authorhash = ASCII.String(Word.word2hash(author));
|
||||
}
|
||||
|
||||
final int tld = querystring.indexOf("tld:", 0);
|
||||
if ( tld >= 0 ) {
|
||||
int ftb = querystring.indexOf(' ', tld);
|
||||
|
@ -521,7 +563,7 @@ public class yacysearch
|
|||
}
|
||||
String domain = querystring.substring(tld + 4, ftb);
|
||||
querystring = querystring.replace("tld:" + domain, "");
|
||||
modifier.append("tld:").append(domain).append(" ");
|
||||
modifier.append("tld:").append(domain).append(' ');
|
||||
while ( domain.length() > 0 && domain.charAt(0) == '.' ) {
|
||||
domain = domain.substring(1);
|
||||
}
|
||||
|
@ -695,6 +737,7 @@ public class yacysearch
|
|||
prefermask,
|
||||
contentdom,
|
||||
language,
|
||||
metatags,
|
||||
navigation,
|
||||
snippetFetchStrategy,
|
||||
maximumRecords,
|
||||
|
|
|
@ -61,6 +61,15 @@
|
|||
</div>
|
||||
#(/nav-authors)#
|
||||
|
||||
#{nav-vocabulary}#
|
||||
<div id="sidebar#[navname]#" style="float: right; margin-top:5px; width: 220px;">
|
||||
<h3 style="padding-left:25px;">#[navname]# Navigator</h3>
|
||||
<div><ul style="padding-left: 0px;">#{element}#
|
||||
<li>#[url]#</li>
|
||||
#{/element}#</ul></div>
|
||||
</div>
|
||||
#{/nav-vocabulary}#
|
||||
|
||||
#(nav-about)#::
|
||||
<div id="sidebarAbout" style="float: right; margin-top:5px; width: 220px;">
|
||||
<h3 style="padding-left:25px;">#[headline]#</h3>
|
||||
|
|
|
@ -25,9 +25,11 @@
|
|||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.document.Autotagging;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
import net.yacy.kelondro.util.EventTracker;
|
||||
import net.yacy.kelondro.util.Formatter;
|
||||
|
@ -219,6 +221,77 @@ public class yacysearchtrailer {
|
|||
prop.put("nav-filetypes_element_" + i + "_nl", 0);
|
||||
}
|
||||
|
||||
// vocabulary navigators
|
||||
final Map<String, ScoreMap<String>> vocabularyNavigators = theSearch.getVocabularyNavigators();
|
||||
if (vocabularyNavigators != null && vocabularyNavigators.size() > 0) {
|
||||
int navvoccount = 0;
|
||||
vocnav: for (Map.Entry<String, ScoreMap<String>> ve: vocabularyNavigators.entrySet()) {
|
||||
String navname = ve.getKey();
|
||||
if (ve.getValue() == null || ve.getValue().isEmpty()) {
|
||||
continue vocnav;
|
||||
}
|
||||
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_navname", navname);
|
||||
navigatorIterator = ve.getValue().keys(false);
|
||||
int i = 0;
|
||||
String anav;
|
||||
while (i < 20 && navigatorIterator.hasNext()) {
|
||||
name = navigatorIterator.next();
|
||||
count = ve.getValue().get(name);
|
||||
anav = "/vocabulary/" + navname + "/" + Autotagging.encodePrintname(name);
|
||||
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_name", name);
|
||||
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_url", "<a href=\"" + QueryParams.navurl("html", 0, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators).toString() + "\">" + name + " (" + count + ")</a>");
|
||||
prop.putJSON("nav-vocabulary_" + navvoccount + "_element_" + i + "_url-json", QueryParams.navurl("json", 0, theQuery, theQuery.queryStringForUrl() + "+" + anav, theQuery.urlMask.toString(), theQuery.navigators).toString());
|
||||
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_count", count);
|
||||
prop.put(fileType, "nav-vocabulary_" + navvoccount + "_element_" + i + "_modifier", anav);
|
||||
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 1);
|
||||
i++;
|
||||
}
|
||||
prop.put("nav-vocabulary_" + navvoccount + "_element", i);
|
||||
i--;
|
||||
prop.put("nav-vocabulary_" + navvoccount + "_element_" + i + "_nl", 0);
|
||||
navvoccount++;
|
||||
}
|
||||
prop.put("nav-vocabulary", navvoccount);
|
||||
} else {
|
||||
prop.put("nav-vocabulary", 0);
|
||||
}
|
||||
/*
|
||||
html
|
||||
#{nav-vocabulary}#
|
||||
<div id="sidebar#[navname]#" style="float: right; margin-top:5px; width: 220px;">
|
||||
<h3 style="padding-left:25px;">#[navname]# Navigator</h3>
|
||||
<div><ul style="padding-left: 0px;">#{element}#
|
||||
<li>#[url]#</li>
|
||||
#{/element}#</ul></div>
|
||||
</div>
|
||||
#{/nav-vocabulary}#
|
||||
|
||||
xml
|
||||
#{nav-vocabulary}#
|
||||
<yacy:facet name="#[navname]#" displayname="#[navname]#" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#{/nav-vocabulary}#
|
||||
|
||||
json
|
||||
#{nav-vocabulary}#
|
||||
{
|
||||
"facetname": "#[navname]#",
|
||||
"displayname": "#[navname]#",
|
||||
"type": "String",
|
||||
"min": "0",
|
||||
"max": "0",
|
||||
"mean": "0",
|
||||
"elements": [
|
||||
#{element}#
|
||||
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
|
||||
#{/element}#
|
||||
]
|
||||
},#{nav-vocabulary}#
|
||||
*/
|
||||
|
||||
// about box
|
||||
final String aboutBody = env.getConfig("about.body", "");
|
||||
final String aboutHeadline = env.getConfig("about.headline", "");
|
||||
|
|
|
@ -63,7 +63,20 @@
|
|||
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
|
||||
#{/element}#
|
||||
]
|
||||
},#(/nav-authors)##(nav-topics)#::
|
||||
},#(/nav-authors)##{nav-vocabulary}#
|
||||
{
|
||||
"facetname": "#[navname]#",
|
||||
"displayname": "#[navname]#",
|
||||
"type": "String",
|
||||
"min": "0",
|
||||
"max": "0",
|
||||
"mean": "0",
|
||||
"elements": [
|
||||
#{element}#
|
||||
{"name": "#[name]#", "count": "#[count]#", "modifier": "#[modifier]#", "url": "#[url-json]#"}#(nl)#::,#(/nl)#
|
||||
#{/element}#
|
||||
]
|
||||
},#{nav-vocabulary}##(nav-topics)#::
|
||||
{
|
||||
"facetname": "topics",
|
||||
"displayname": "Topics",
|
||||
|
|
|
@ -7,40 +7,47 @@
|
|||
</yacy:facet>
|
||||
#(/nav-domains)#
|
||||
#(nav-namespace)#::
|
||||
<yacy:facet name="domains" displayname="Namespace" type="String" min="0" max="0" mean="0">
|
||||
<yacy:facet name="namespace" displayname="Namespace" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#(/nav-namespace)#
|
||||
#(nav-authors)#::
|
||||
<yacy:facet name="domains" displayname="Authors" type="String" min="0" max="0" mean="0">
|
||||
<yacy:facet name="authors" displayname="Authors" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#(/nav-authors)#
|
||||
#(nav-filetype)#::
|
||||
<yacy:facet name="domains" displayname="Filetypes" type="String" min="0" max="0" mean="0">
|
||||
<yacy:facet name="filetypes" displayname="Filetypes" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#(/nav-filetype)#
|
||||
#(nav-protocol)#::
|
||||
<yacy:facet name="domains" displayname="Protocols" type="String" min="0" max="0" mean="0">
|
||||
<yacy:facet name="protocols" displayname="Protocols" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#(/nav-protocol)#
|
||||
#(nav-topics)#::
|
||||
<yacy:facet name="topwords" displayname="Topics" type="String" min="0" max="0" mean="0">
|
||||
<yacy:facet name="topics" displayname="Topics" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#(/nav-topics)#
|
||||
#{nav-vocabulary}#
|
||||
<yacy:facet name="#[navname]#" displayname="#[navname]#" type="String" min="0" max="0" mean="0">
|
||||
#{element}#
|
||||
<yacy:element name="#[name]#" count="#[count]#" modifier="#[modifier]#" />
|
||||
#{/element}#
|
||||
</yacy:facet>
|
||||
#{/nav-vocabulary}#
|
||||
</yacy:navigation>
|
||||
<opensearch:totalResults>#[num-results_totalcount]#</opensearch:totalResults>
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
@ -43,12 +44,12 @@ import net.yacy.kelondro.util.FileUtils;
|
|||
*/
|
||||
public class Autotagging {
|
||||
|
||||
final static Object PRESENT = new Object();
|
||||
private final static Object PRESENT = new Object();
|
||||
|
||||
final char prefixChar;
|
||||
final File autotaggingPath;
|
||||
final Map<String, Vocabulary> vocabularies;
|
||||
final Map<String, Object> allTags;
|
||||
public final char prefixChar;
|
||||
private final File autotaggingPath;
|
||||
private final Map<String, Vocabulary> vocabularies;
|
||||
private final Map<String, Object> allTags;
|
||||
|
||||
public Autotagging(final File autotaggingPath, char prefixChar) {
|
||||
this.vocabularies = new ConcurrentHashMap<String, Vocabulary>();
|
||||
|
@ -92,6 +93,10 @@ public class Autotagging {
|
|||
}
|
||||
}
|
||||
|
||||
public Collection<Vocabulary> getVocabularies() {
|
||||
return this.vocabularies.values();
|
||||
}
|
||||
|
||||
public Set<String> allTags() {
|
||||
return this.allTags.keySet();
|
||||
}
|
||||
|
@ -138,7 +143,7 @@ public class Autotagging {
|
|||
word = normalizeWord(word);
|
||||
for (Map.Entry<String, Vocabulary> v: this.vocabularies.entrySet()) {
|
||||
tag = v.getValue().getMetatag(word);
|
||||
if (tag != null) return tag.getMetatag();
|
||||
if (tag != null) return tag.toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -178,6 +183,11 @@ public class Autotagging {
|
|||
continue vocloop;
|
||||
}
|
||||
k = line.substring(0, p).trim();
|
||||
k = k.replaceAll(" \\+", ", "); // remove symbols that are bad in a query attribute
|
||||
k = k.replaceAll(" /", ", ");
|
||||
k = k.replaceAll("\\+", ",");
|
||||
k = k.replaceAll("/", ",");
|
||||
k = k.replaceAll(" ", " ");
|
||||
v = line.substring(p + 1);
|
||||
tags = v.split(",");
|
||||
tagloop: for (String t: tags) {
|
||||
|
@ -238,6 +248,8 @@ public class Autotagging {
|
|||
private final static Pattern PATTERN_OE = Pattern.compile("\u00F6");
|
||||
private final static Pattern PATTERN_UE = Pattern.compile("\u00FC");
|
||||
private final static Pattern PATTERN_SZ = Pattern.compile("\u00DF");
|
||||
private final static Pattern PATTERN_UL = Pattern.compile("_");
|
||||
private final static Pattern PATTERN_SP = Pattern.compile(" ");
|
||||
|
||||
private static final String normalizeWord(String word) {
|
||||
word = word.trim().toLowerCase();
|
||||
|
@ -255,12 +267,12 @@ public class Autotagging {
|
|||
this.vocName = vocName;
|
||||
this.print = print;
|
||||
}
|
||||
public Metatag(String metatag) {
|
||||
public Metatag(String metatag) throws RuntimeException {
|
||||
assert metatag.charAt(0) == Autotagging.this.prefixChar;
|
||||
int p = metatag.indexOf(':');
|
||||
assert p > 0;
|
||||
if (p < 0) throw new RuntimeException("bad metatag: metatag = " + metatag);
|
||||
this.vocName = metatag.substring(1, p);
|
||||
this.print = metatag.substring(p + 1);
|
||||
this.print = decodeMaskname(metatag.substring(p + 1));
|
||||
}
|
||||
public String getVocabularyName() {
|
||||
return this.vocName;
|
||||
|
@ -268,19 +280,45 @@ public class Autotagging {
|
|||
public String getPrintName() {
|
||||
return this.print;
|
||||
}
|
||||
public String getMetatag() {
|
||||
return Autotagging.this.prefixChar + this.vocName + ":" + this.print.replaceAll(" ", "_");
|
||||
@Override
|
||||
public String toString() {
|
||||
return Autotagging.this.prefixChar + this.vocName + ":" + encodePrintname(this.print);
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object m) {
|
||||
Metatag m0 = (Metatag) m;
|
||||
return this.vocName.equals(m0.vocName) && this.print.equals(m0.print);
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.vocName.hashCode() + this.print.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
public static final String encodePrintname(String printname) {
|
||||
return PATTERN_SP.matcher(printname).replaceAll("_");
|
||||
}
|
||||
|
||||
public static final String decodeMaskname(String maskname) {
|
||||
return PATTERN_UL.matcher(maskname).replaceAll(" ");
|
||||
}
|
||||
|
||||
public Metatag metatag(String vocName, String print) {
|
||||
return new Metatag(vocName, print);
|
||||
}
|
||||
|
||||
public Metatag metatag(String metatag) {
|
||||
public Metatag metatag(String metatag) throws RuntimeException {
|
||||
return new Metatag(metatag);
|
||||
}
|
||||
|
||||
public static boolean metatagAppearIn(final Metatag metatag, final String[] tags) {
|
||||
String tag = metatag.toString();
|
||||
for (String s: tags) {
|
||||
if (tag.equals(s)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
Autotagging a = new Autotagging(new File("DATA/DICTIONARIES/" + LibraryProvider.path_to_autotagging_dictionaries), '$');
|
||||
for (Map.Entry<String, Vocabulary> entry: a.vocabularies.entrySet()) {
|
||||
|
|
|
@ -28,6 +28,8 @@ package net.yacy.search.query;
|
|||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
@ -42,6 +44,7 @@ import net.yacy.cora.document.ASCII;
|
|||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.services.federated.yacy.CacheStrategy;
|
||||
import net.yacy.document.Autotagging;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.parser.html.AbstractScraper;
|
||||
import net.yacy.document.parser.html.CharacterCoding;
|
||||
|
@ -113,6 +116,7 @@ public final class QueryParams {
|
|||
public final boolean urlMask_isCatchall, prefer_isMatchnothing;
|
||||
public final ContentDomain contentdom;
|
||||
public final String targetlang;
|
||||
public final Collection<Autotagging.Metatag> metatags;
|
||||
public final String navigators;
|
||||
public final Searchdom domType;
|
||||
public final int zonecode;
|
||||
|
@ -176,6 +180,7 @@ public final class QueryParams {
|
|||
this.itemsPerPage = itemsPerPage;
|
||||
this.offset = 0;
|
||||
this.targetlang = "en";
|
||||
this.metatags = new ArrayList<Autotagging.Metatag>(0);
|
||||
this.domType = Searchdom.LOCAL;
|
||||
this.zonecode = DigestURI.TLD_any_zone_filter;
|
||||
this.domMaxTargets = 0;
|
||||
|
@ -205,6 +210,7 @@ public final class QueryParams {
|
|||
final String modifier,
|
||||
final int maxDistance, final String prefer, final ContentDomain contentdom,
|
||||
final String language,
|
||||
final Collection<Autotagging.Metatag> metatags,
|
||||
final String navigators,
|
||||
final CacheStrategy snippetCacheStrategy,
|
||||
final int itemsPerPage, final int offset, final String urlMask,
|
||||
|
@ -247,6 +253,7 @@ public final class QueryParams {
|
|||
this.prefer_isMatchnothing = this.prefer.toString().equals(matchnothing_pattern.toString());
|
||||
assert language != null;
|
||||
this.targetlang = language;
|
||||
this.metatags = metatags;
|
||||
this.navigators = navigators;
|
||||
this.domType = domType;
|
||||
this.zonecode = domainzone;
|
||||
|
@ -506,6 +513,8 @@ public final class QueryParams {
|
|||
context.append(asterisk);
|
||||
context.append(this.maxDistance);
|
||||
context.append(asterisk);
|
||||
context.append(this.modifier.s);
|
||||
context.append(asterisk);
|
||||
context.append(this.snippetCacheStrategy == null ? "null" : this.snippetCacheStrategy.name());
|
||||
if (anonymized) {
|
||||
this.idCacheAnon = context.toString();
|
||||
|
|
|
@ -46,7 +46,10 @@ import net.yacy.cora.sorting.ConcurrentScoreMap;
|
|||
import net.yacy.cora.sorting.ScoreMap;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue;
|
||||
import net.yacy.cora.sorting.WeakPriorityBlockingQueue.ReverseElement;
|
||||
import net.yacy.document.Autotagging;
|
||||
import net.yacy.document.Autotagging.Metatag;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||
import net.yacy.kelondro.data.word.Word;
|
||||
|
@ -101,6 +104,7 @@ public final class RWIProcess extends Thread
|
|||
private final ScoreMap<String> namespaceNavigator; // a counter for name spaces
|
||||
private final ScoreMap<String> protocolNavigator; // a counter for protocol types
|
||||
private final ScoreMap<String> filetypeNavigator; // a counter for file types
|
||||
private final Map<String, ScoreMap<String>> vocabularyNavigator; // counters for Vocabularies
|
||||
|
||||
public RWIProcess(final QueryParams query, final ReferenceOrder order, final int maxentries, final boolean remote) {
|
||||
// we collect the urlhashes and construct a list with urlEntry objects
|
||||
|
@ -132,6 +136,7 @@ public final class RWIProcess extends Thread
|
|||
this.namespaceNavigator = new ConcurrentScoreMap<String>();
|
||||
this.protocolNavigator = new ConcurrentScoreMap<String>();
|
||||
this.filetypeNavigator = new ConcurrentScoreMap<String>();
|
||||
this.vocabularyNavigator = new ConcurrentHashMap<String, ScoreMap<String>>();
|
||||
this.ref = new ConcurrentScoreMap<String>();
|
||||
this.feedersAlive = new AtomicInteger(0);
|
||||
this.feedersTerminated = new AtomicInteger(0);
|
||||
|
@ -349,8 +354,7 @@ public final class RWIProcess extends Thread
|
|||
this.urlhashes.putUnique(iEntry.urlhash());
|
||||
rankingtryloop: while ( true ) {
|
||||
try {
|
||||
this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order
|
||||
.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
||||
this.stack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
||||
break rankingtryloop;
|
||||
} catch ( final ArithmeticException e ) {
|
||||
// this may happen if the concurrent normalizer changes values during cardinal computation
|
||||
|
@ -482,8 +486,7 @@ public final class RWIProcess extends Thread
|
|||
m = this.doubleDomCache.get(hosthash);
|
||||
if ( m == null ) {
|
||||
// first appearance of dom. we create an entry to signal that one of that domain was already returned
|
||||
m =
|
||||
new WeakPriorityBlockingQueue<WordReferenceVars>((this.query.specialRights)
|
||||
m = new WeakPriorityBlockingQueue<WordReferenceVars>((this.query.specialRights)
|
||||
? maxDoubleDomSpecial
|
||||
: maxDoubleDomAll);
|
||||
this.doubleDomCache.put(hosthash, m);
|
||||
|
@ -504,8 +507,7 @@ public final class RWIProcess extends Thread
|
|||
WeakPriorityBlockingQueue.Element<WordReferenceVars> bestEntry = null;
|
||||
WeakPriorityBlockingQueue.Element<WordReferenceVars> o;
|
||||
synchronized ( this.doubleDomCache ) {
|
||||
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i =
|
||||
this.doubleDomCache.values().iterator();
|
||||
final Iterator<WeakPriorityBlockingQueue<WordReferenceVars>> i = this.doubleDomCache.values().iterator();
|
||||
while ( i.hasNext() ) {
|
||||
try {
|
||||
m = i.next();
|
||||
|
@ -557,10 +559,9 @@ public final class RWIProcess extends Thread
|
|||
final long timeout = System.currentTimeMillis() + Math.max(10, waitingtime);
|
||||
int p = -1;
|
||||
long timeleft;
|
||||
while ( (timeleft = timeout - System.currentTimeMillis()) > 0 ) {
|
||||
takeloop: while ( (timeleft = timeout - System.currentTimeMillis()) > 0 ) {
|
||||
//System.out.println("timeleft = " + timeleft);
|
||||
final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi =
|
||||
takeRWI(skipDoubleDom, timeleft);
|
||||
final WeakPriorityBlockingQueue.Element<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
|
||||
if ( obrwi == null ) {
|
||||
return null; // all time was already wasted in takeRWI to get another element
|
||||
}
|
||||
|
@ -635,6 +636,20 @@ public final class RWIProcess extends Thread
|
|||
continue;
|
||||
}
|
||||
|
||||
// check vocabulary constraint
|
||||
final String tags = page.dc_subject();
|
||||
final String[] taglist = tags == null || tags.length() == 0 ? new String[0] : SPACE_PATTERN.split(page.dc_subject());
|
||||
if (this.query.metatags != null && this.query.metatags.size() > 0) {
|
||||
// all metatags must appear in the tags list
|
||||
for (Metatag metatag: this.query.metatags) {
|
||||
if (!Autotagging.metatagAppearIn(metatag, taglist)) {
|
||||
this.sortout++;
|
||||
Log.logInfo("RWIProcess", "sorted out " + page.url());
|
||||
continue takeloop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// evaluate information of metadata for navigation
|
||||
// author navigation:
|
||||
if ( pageauthor != null && pageauthor.length() > 0 ) {
|
||||
|
@ -654,6 +669,12 @@ public final class RWIProcess extends Thread
|
|||
continue;
|
||||
}
|
||||
|
||||
// check Scanner
|
||||
if ( !Scanner.acceptURL(page.url()) ) {
|
||||
this.sortout++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// namespace navigation
|
||||
String pagepath = page.url().getPath();
|
||||
if ( (p = pagepath.indexOf(':')) >= 0 ) {
|
||||
|
@ -675,10 +696,20 @@ public final class RWIProcess extends Thread
|
|||
this.filetypeNavigator.inc(fileext);
|
||||
}
|
||||
|
||||
// check Scanner
|
||||
if ( !Scanner.acceptURL(page.url()) ) {
|
||||
this.sortout++;
|
||||
continue;
|
||||
// vocabulary navigation
|
||||
tagharvest: for (String tag: taglist) {
|
||||
if (tag.length() < 1 || tag.charAt(0) != LibraryProvider.tagPrefix) continue tagharvest;
|
||||
try {
|
||||
Metatag metatag = LibraryProvider.autotagging.metatag(tag);
|
||||
ScoreMap<String> voc = this.vocabularyNavigator.get(metatag.getVocabularyName());
|
||||
if (voc == null) {
|
||||
voc = new ConcurrentScoreMap<String>();
|
||||
this.vocabularyNavigator.put(metatag.getVocabularyName(), voc);
|
||||
}
|
||||
voc.inc(metatag.getPrintName());
|
||||
} catch (RuntimeException e) {
|
||||
// tag may not be well-formed
|
||||
}
|
||||
}
|
||||
|
||||
// accept url
|
||||
|
@ -687,6 +718,8 @@ public final class RWIProcess extends Thread
|
|||
return null;
|
||||
}
|
||||
|
||||
final static Pattern SPACE_PATTERN = Pattern.compile(" ");
|
||||
|
||||
public int sizeQueue() {
|
||||
int c = this.stack.sizeQueue();
|
||||
for ( final WeakPriorityBlockingQueue<WordReferenceVars> s : this.doubleDomCache.values() ) {
|
||||
|
@ -818,6 +851,10 @@ public final class RWIProcess extends Thread
|
|||
return this.filetypeNavigator;
|
||||
}
|
||||
|
||||
public Map<String,ScoreMap<String>> getVocabularyNavigators() {
|
||||
return this.vocabularyNavigator;
|
||||
}
|
||||
|
||||
public static final Comparator<Map.Entry<String, Integer>> mecomp =
|
||||
new Comparator<Map.Entry<String, Integer>>() {
|
||||
@Override
|
||||
|
|
|
@ -472,6 +472,10 @@ public final class SearchEvent
|
|||
return this.rankingProcess.getFiletypeNavigator();
|
||||
}
|
||||
|
||||
public Map<String,ScoreMap<String>> getVocabularyNavigators() {
|
||||
return this.rankingProcess.getVocabularyNavigators();
|
||||
}
|
||||
|
||||
public void addHeuristic(final byte[] urlhash, final String heuristicName, final boolean redundant) {
|
||||
synchronized ( this.heuristics ) {
|
||||
this.heuristics.put(urlhash, new HeuristicResult(urlhash, heuristicName, redundant));
|
||||
|
|
|
@ -481,7 +481,7 @@ public class SnippetProcess {
|
|||
}
|
||||
|
||||
// get next entry
|
||||
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(100, this.timeout - System.currentTimeMillis()));
|
||||
page = SnippetProcess.this.rankingProcess.takeURL(true, Math.min(500, Math.max(100, this.timeout - System.currentTimeMillis())));
|
||||
//if (page != null) Log.logInfo("ResultFetcher", "got one page: " + page.metadata().url().toNormalform(true, false));
|
||||
//if (page == null) page = rankedCache.takeURL(false, this.timeout - System.currentTimeMillis());
|
||||
if (page == null) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user