mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
removed ContentControl servlet and functinality
This was not used at all (as I know) and was blocking a smooth integration of ivy in the context of an existing JSON parser.
This commit is contained in:
parent
b54f4ad35f
commit
fc98ca7a9c
123
.classpath
123
.classpath
|
@ -3,119 +3,6 @@
|
||||||
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/>
|
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/>
|
||||||
<classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
|
<classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
|
||||||
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
|
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
|
|
||||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
|
|
||||||
<attributes>
|
|
||||||
<attribute name="module" value="true"/>
|
|
||||||
</attributes>
|
|
||||||
</classpathentry>
|
|
||||||
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/avatica-core-1.13.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/calcite-core-1.18.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/calcite-linq4j-1.18.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jchardet-1.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/common-image-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-lang3-3.12.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/httpclient-4.5.12.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/httpcore-4.4.13.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/httpmime-4.5.12.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/icu4j-63.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/imageio-bmp-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/imageio-core-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/imageio-metadata-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/imageio-tiff-3.3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jackson-annotations-2.11.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jackson-core-2.11.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jackson-databind-2.11.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/oro-2.0.8.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jaudiotagger-2.2.5.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.25.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-client-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-continuation-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-deploy-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-http-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-io-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-jmx-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-proxy-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-security-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-server-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-servlet-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-servlets-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-util-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-webapp-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jetty-xml-9.4.35.v20201120.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jsch-0.1.54.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jsonic-1.3.10.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jwat-archive-common-1.1.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jwat-common-1.1.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jwat-gzip-1.1.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jwat-warc-1.1.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-analyzers-common-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-backward-codecs-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-classification-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-codecs-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-core-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-grouping-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-highlighter-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-join-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-memory-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-misc-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-queries-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-queryparser-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-spatial-extras-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/lucene-suggest-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/opentracing-api-0.33.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/opentracing-noop-0.33.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/opentracing-util-0.33.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/metadata-extractor-2.11.0.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/metrics-core-3.2.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/metrics-jmx-4.1.5.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/org.restlet.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/pdfbox-2.0.15.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/poi-3.17.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/poi-scratchpad-3.17.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/rrd4j-3.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/solr-core-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/solr-solrj-8.8.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/spatial4j-0.6.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/stax2-api-3.1.4.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/weupnp-0.1.4.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/woodstox-core-asl-4.4.1.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/xml-apis-1.4.01.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/xmpcore-5.1.3.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/xz-1.8.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/zookeeper-3.4.14.jar"/>
|
|
||||||
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/hazelcast-4.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/commons-compress-1.21.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/bcmail-jdk15on-1.69.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/bcpkix-jdk15on-1.69.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/bcprov-jdk15on-1.69.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/jsoup-1.14.2.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.32.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/slf4j-api-1.7.32.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.32.jar"/>
|
|
||||||
<classpathentry kind="lib" path="lib/langdetect-1.1-20120112.jar"/>
|
|
||||||
<classpathentry kind="src" path="htroot/api/blacklists"/>
|
<classpathentry kind="src" path="htroot/api/blacklists"/>
|
||||||
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
|
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
|
||||||
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
|
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
|
||||||
|
@ -126,6 +13,16 @@
|
||||||
<classpathentry kind="src" path="htroot/yacy"/>
|
<classpathentry kind="src" path="htroot/yacy"/>
|
||||||
<classpathentry kind="src" path="source"/>
|
<classpathentry kind="src" path="source"/>
|
||||||
<classpathentry kind="src" path="test/java"/>
|
<classpathentry kind="src" path="test/java"/>
|
||||||
|
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
|
||||||
|
<attributes>
|
||||||
|
<attribute name="module" value="true"/>
|
||||||
|
</attributes>
|
||||||
|
</classpathentry>
|
||||||
|
<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=yacy&ivyXmlPath=ivy.xml&confs=compile">
|
||||||
|
<attributes>
|
||||||
|
<attribute name="module" value="true"/>
|
||||||
|
</attributes>
|
||||||
|
</classpathentry>
|
||||||
<classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/>
|
<classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/>
|
||||||
<classpathentry kind="output" path="gen"/>
|
<classpathentry kind="output" path="gen"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
1
.project
1
.project
|
@ -23,5 +23,6 @@
|
||||||
</buildSpec>
|
</buildSpec>
|
||||||
<natures>
|
<natures>
|
||||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||||
|
<nature>org.apache.ivyde.eclipse.ivynature</nature>
|
||||||
</natures>
|
</natures>
|
||||||
</projectDescription>
|
</projectDescription>
|
||||||
|
|
|
@ -1370,16 +1370,6 @@ core.service.webgraph.tmp = false
|
||||||
parserAugmentation = false
|
parserAugmentation = false
|
||||||
parserAugmentation.RDFa = false
|
parserAugmentation.RDFa = false
|
||||||
|
|
||||||
# Content control settings
|
|
||||||
contentcontrol.enabled = false
|
|
||||||
contentcontrol.bookmarklist = contentcontrol
|
|
||||||
contentcontrol.mandatoryfilterlist = yacy
|
|
||||||
contentcontrol.smwimport.enabled = false
|
|
||||||
contentcontrol.smwimport.baseurl =
|
|
||||||
contentcontrol.smwimport.purgelistoninit = true
|
|
||||||
contentcontrol.smwimport.targetlist = contentcontrol
|
|
||||||
contentcontrol.smwimport.defaultcategory = yacy
|
|
||||||
|
|
||||||
# host browser settings
|
# host browser settings
|
||||||
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
|
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
|
||||||
browser.autoload = false
|
browser.autoload = false
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
<head>
|
|
||||||
<title>YaCy '#[clientname]#': Content Control</title>
|
|
||||||
#%env/templates/metas.template%#
|
|
||||||
</head>
|
|
||||||
<body id="Settings">
|
|
||||||
#%env/templates/header.template%#
|
|
||||||
#%env/templates/submenuBlacklist.template%#
|
|
||||||
|
|
||||||
<h2>Content Control</h2>
|
|
||||||
|
|
||||||
<form id="contentcontrolsettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
|
|
||||||
|
|
||||||
<fieldset><legend id="augmentation">Peer Content Control URL Filter</legend>
|
|
||||||
<p>
|
|
||||||
With this settings you can activate or deactivate content control on this peer.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
|
|
||||||
<dl>
|
|
||||||
|
|
||||||
<dt><label for="content">Use content control filtering:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="checkbox" name="contentcontrolenabled" id="contentcontrolenabled" #(contentcontrolenabled_checked)#:: checked="checked"#(/contentcontrolenabled_checked)# />Enabled<br/>
|
|
||||||
<p class="help">
|
|
||||||
Enables or disables content control.
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
|
|
||||||
<dt><label for="content">Use this table to create filter:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
|
|
||||||
<p class="help">
|
|
||||||
Define a table. Default: contentcontrol
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
<dt></dt>
|
|
||||||
<dd><input type="submit" name="contentcontrolSettings" value="Submit" class="btn btn-primary"/></dd>
|
|
||||||
</dl>
|
|
||||||
</fieldset>
|
|
||||||
</form>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
|
|
||||||
<fieldset><legend id="urlproxy">Content Control SMW Import Settings</legend>
|
|
||||||
<p>
|
|
||||||
With this settings you can define the content control import settings. You can define a <a href="http://wiki.sciety.org/mediawiki/extensions/yacy-smwextension/" target="_blank">Semantic Media Wiki with the appropriate extensions.</a>
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<dl>
|
|
||||||
|
|
||||||
<dt><label for="content">SMW import to content control list:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
|
|
||||||
<p class="help">
|
|
||||||
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
<dt><label for="content">SMW import base URL:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="text" name="ccsmwimporturl" value="#[ccsmwimporturl]#" size="60" /><br/><br/>
|
|
||||||
<p class="help">
|
|
||||||
Define base URL for SMW special page "Ask". Example: http://my.wiki.cc/wiki/Special:Ask
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
<dt><label for="content">SMW import target table:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
|
|
||||||
<p class="help">
|
|
||||||
Define import target table. Default: contentcontrol
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
|
|
||||||
<dt><label for="content">Purge content control list on initial sync:</label></dt>
|
|
||||||
<dd>
|
|
||||||
<input type="checkbox" name="ccsmwpurge" id="ccsmwpurge" #(ccsmwpurge_checked)#:: checked="checked"#(/ccsmwpurge_checked)# />Enabled<br/>
|
|
||||||
<p class="help">
|
|
||||||
Purge content control list on initial synchronisation after startup.
|
|
||||||
</p>
|
|
||||||
</dd>
|
|
||||||
<dt></dt>
|
|
||||||
<dd><input type="submit" name="contentcontrolExtraSettings" value="Submit" class="btn btn-primary"/></dd>
|
|
||||||
</dl>
|
|
||||||
</fieldset>
|
|
||||||
</form>
|
|
||||||
|
|
||||||
|
|
||||||
#%env/templates/footer.template%#
|
|
||||||
</body>
|
|
||||||
</html>
|
|
|
@ -1,68 +0,0 @@
|
||||||
import net.yacy.cora.protocol.RequestHeader;
|
|
||||||
import net.yacy.server.serverObjects;
|
|
||||||
import net.yacy.server.serverSwitch;
|
|
||||||
|
|
||||||
public final class ContentControl_p {
|
|
||||||
|
|
||||||
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
|
|
||||||
final serverObjects post, final serverSwitch env) {
|
|
||||||
|
|
||||||
final serverObjects prop = new serverObjects();
|
|
||||||
|
|
||||||
if (post != null) {
|
|
||||||
|
|
||||||
if (post.containsKey("contentcontrolExtraSettings")) {
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.smwimport.baseurl",
|
|
||||||
post.get("ccsmwimporturl"));
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.smwimport.enabled",
|
|
||||||
"on".equals(post.get("ccsmwimport")) ? true : false);
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.smwimport.purgelistoninit",
|
|
||||||
"on".equals(post.get("ccsmwpurge")) ? true : false);
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.smwimport.targetlist",
|
|
||||||
post.get("ccsmwimportlist"));
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (post.containsKey("contentcontrolSettings")) {
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.enabled",
|
|
||||||
"on".equals(post.get("contentcontrolenabled")) ? true : false);
|
|
||||||
|
|
||||||
|
|
||||||
env.setConfig("contentcontrol.bookmarklist",
|
|
||||||
post.get("contentcontrolbml"));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
prop.putHTML("ccsmwimportlist",
|
|
||||||
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
|
|
||||||
|
|
||||||
prop.put("ccsmwpurge_checked", env.getConfigBool(
|
|
||||||
"contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0");
|
|
||||||
|
|
||||||
prop.putHTML("ccsmwimporturl",
|
|
||||||
env.getConfig("contentcontrol.smwimport.baseurl", ""));
|
|
||||||
|
|
||||||
prop.put("ccsmwimport_checked", env.getConfigBool(
|
|
||||||
"contentcontrol.smwimport.enabled", false) ? "1" : "0");
|
|
||||||
|
|
||||||
|
|
||||||
prop.put("contentcontrolenabled_checked",
|
|
||||||
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
|
|
||||||
|
|
||||||
prop.putHTML("contentcontrolbml",
|
|
||||||
env.getConfig("contentcontrol.bookmarklist", ""));
|
|
||||||
|
|
||||||
// return rewrite properties
|
|
||||||
return prop;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -5,6 +5,5 @@
|
||||||
<li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li>
|
<li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li>
|
||||||
<li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li>
|
<li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li>
|
||||||
<li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li>
|
<li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li>
|
||||||
<li><a href="ContentControl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Content Control</a></li>
|
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
2
ivy.xml
2
ivy.xml
|
@ -13,7 +13,6 @@
|
||||||
<dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/>
|
<dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/>
|
||||||
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" />
|
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" />
|
||||||
<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/>
|
<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/>
|
||||||
<dependency org="com.googlecode.json-simple" name="json-simple" rev="1.1.1" conf="compile->master" />
|
|
||||||
<dependency org="com.google.guava" name="guava" rev="25.1-jre" conf="compile->master"/>
|
<dependency org="com.google.guava" name="guava" rev="25.1-jre" conf="compile->master"/>
|
||||||
<dependency org="com.hazelcast" name="hazelcast" rev="4.2" />
|
<dependency org="com.hazelcast" name="hazelcast" rev="4.2" />
|
||||||
<dependency org="com.ibm.icu" name="icu4j" rev="63.1"/>
|
<dependency org="com.ibm.icu" name="icu4j" rev="63.1"/>
|
||||||
|
@ -94,7 +93,6 @@
|
||||||
<!-- This does not match langdetect.jar from pre-ivy -->
|
<!-- This does not match langdetect.jar from pre-ivy -->
|
||||||
<dependency org="org.tukaani" name="xz" rev="1.8"/>
|
<dependency org="org.tukaani" name="xz" rev="1.8"/>
|
||||||
<dependency org="oro" name="oro" rev="2.0.8"/>
|
<dependency org="oro" name="oro" rev="2.0.8"/>
|
||||||
<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/>
|
|
||||||
|
|
||||||
<dependency org="junit" name="junit" rev="4.13" conf="test->default"/>
|
<dependency org="junit" name="junit" rev="4.13" conf="test->default"/>
|
||||||
<dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/>
|
<dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/>
|
||||||
|
|
|
@ -953,30 +953,6 @@ Duration==Dauer
|
||||||
#ID==ID
|
#ID==ID
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==Inhaltskontrolle<
|
|
||||||
Peer Content Control URL Filter==Peer Inhaltskontrolle URL Filter
|
|
||||||
With this settings you can activate or deactivate content control on this peer.==Mit dieser Einstellung kann die Inhaltskontrolle auf diesem Peer an- oder abgeschalten werden.
|
|
||||||
Use content control filtering:==Verwende Inhaltskontrollfilter:
|
|
||||||
>Enabled<==>Aktiviert<
|
|
||||||
Enables or disables content control.==Schaltet Inhaltskontrolle an- oder ab.
|
|
||||||
Use this table to create filter:==Verwenden Sie diese Tabelle, um Filter zu erzeugen:
|
|
||||||
Define a table. Default:==Definieren Sie ein Tabelle. Standardeinstellung:
|
|
||||||
Content Control SMW Import Settings==Inhaltskontrolle SMW Importeinstellungen
|
|
||||||
With this settings you can define the content control import settings. You can define a==Mit diesen Einstellungen können Sie die Importeinstellungen für die Inhaltskontrolle definieren. Definieren Sie ein
|
|
||||||
Semantic Media Wiki with the appropriate extensions.==Semantisches Media Wiki mit den passenden Erweiterungen.
|
|
||||||
SMW import to content control list:==SMW Import für die Inhalts-Kontroll-Liste:
|
|
||||||
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Konstante Synchronisation der Inhalts-Kontroll-Liste vom SMW (Semantisches Medienwiki) im Hintergrund. Benötigt Neustart!
|
|
||||||
SMW import base URL:==SMW Import Basis URL:
|
|
||||||
Define base URL for SMW special page "Ask". Example: ==Definiere Basis URL für SMW Spezialseite "Ask". Beispiel:
|
|
||||||
SMW import target table:==SMW Import Ziele Tabelle:
|
|
||||||
Define import target table. Default: contentcontrol==Definieren Import Ziel Tabelle. Standardeinstellung: contentcontrol
|
|
||||||
Purge content control list on initial sync:==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation:
|
|
||||||
Purge content control list on initial synchronisation after startup.==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation nach dem Start.
|
|
||||||
"Submit"=="Absenden"
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#File: CookieMonitorIncoming_p.html
|
#File: CookieMonitorIncoming_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
|
|
|
@ -530,13 +530,6 @@ Duration==Duración
|
||||||
ID==ID
|
ID==ID
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==Control de contenido<
|
|
||||||
>Enabled<==>Habilitado
|
|
||||||
"Submit"=="Enviar"
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#File: CookieMonitorIncoming_p.html
|
#File: CookieMonitorIncoming_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
|
|
|
@ -510,13 +510,6 @@ Duration==Durata
|
||||||
ID==ID
|
ID==ID
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==Controllo dei contenuti<
|
|
||||||
>Enabled<==>Abilitato
|
|
||||||
"Submit"=="Invia"
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#File: CookieMonitorIncoming_p.html
|
#File: CookieMonitorIncoming_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
|
|
|
@ -714,13 +714,6 @@ Last Deploy==最後の展開
|
||||||
Connection Tracking==接続の追跡
|
Connection Tracking==接続の追跡
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==コンテントの制御<
|
|
||||||
"Submit"=="確定する"
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#File: CookieMonitorIncoming_p.html
|
#File: CookieMonitorIncoming_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
Incoming Cookies Monitor==着信したCookieのモニター
|
Incoming Cookies Monitor==着信したCookieのモニター
|
||||||
|
|
|
@ -2429,71 +2429,6 @@
|
||||||
</body>
|
</body>
|
||||||
</file>
|
</file>
|
||||||
|
|
||||||
<file original="ContentControl_p.html" source-language="en" datatype="html">
|
|
||||||
<body>
|
|
||||||
<trans-unit id="3f3b9286" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Content Control<</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="d21676d1" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Peer Content Control URL Filter</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="542e1ecb" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>With this settings you can activate or deactivate content control on this peer.</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="2bd01413" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Use content control filtering:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="4e4f2379" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>>Enabled<</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="ff54fe20" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Enables or disables content control.</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="81cdc1a8" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Use this table to create filter:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="2a641f75" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Define a table. Default:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="c3a262b1" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Content Control SMW Import Settings</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="fe0fc485" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>With this settings you can define the content control import settings. You can define a</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="a00319d4" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Semantic Media Wiki with the appropriate extensions.</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="3f00f0c5" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>SMW import to content control list:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="446815ef" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="d9bff282" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>SMW import base URL:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="ecfbe3e8" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Define base URL for SMW special page "Ask". Example: </source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="d0d7e963" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>SMW import target table:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="84acd3e4" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Define import target table. Default: contentcontrol</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="70ed825" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Purge content control list on initial sync:</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="642de9e8" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>Purge content control list on initial synchronisation after startup.</source>
|
|
||||||
</trans-unit>
|
|
||||||
<trans-unit id="bfcc5088" xml:space="preserve" approved="no" translate="yes">
|
|
||||||
<source>"Submit"</source>
|
|
||||||
</trans-unit>
|
|
||||||
</body>
|
|
||||||
</file>
|
|
||||||
|
|
||||||
<file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html">
|
<file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html">
|
||||||
<body>
|
<body>
|
||||||
<trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes">
|
<trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes">
|
||||||
|
|
|
@ -1059,30 +1059,6 @@ Duration==Длительность
|
||||||
#ID==ID
|
#ID==ID
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==Управление контентом<
|
|
||||||
Peer Content Control URL Filter==Управление контентом узла
|
|
||||||
With this settings you can activate or deactivate content control on this peer.==Эти настройки позволяют включить или отключить управление контентом для вашего узла.
|
|
||||||
Use content control filtering:==Использовать фильтр управления контентом:
|
|
||||||
>Enabled<==>Включить<
|
|
||||||
Enables or disables content control.==Включение или отключение управления контентом.
|
|
||||||
Use this table to create filter:==Использовать это поле для создания фильтра:
|
|
||||||
Define a table. Default:==Задать значение поля. По-умолчанию:
|
|
||||||
Content Control SMW Import Settings==Импорт настроек управления контентом SMW
|
|
||||||
With this settings you can define the content control import settings. You can define a==Эти настройки позволяют задать параметры импорта настроек управления контентом
|
|
||||||
Semantic Media Wiki with the appropriate extensions.==Semantic Media Wiki с соответствующими расширениями.
|
|
||||||
SMW import to content control list:== Импорт SMW в список управления контентом:
|
|
||||||
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Включение или отключение постоянной фоновой синхронизации списка управления контентом из SMW (Semantic Mediawiki). Потребуется перезапуск программы!
|
|
||||||
SMW import base URL:==Ссылка на импортируемую базу SMW:
|
|
||||||
Define base URL for SMW special page "Ask". Example: ==Укажите ссылку на базу SMW на специальной странице "Ask". Например:
|
|
||||||
SMW import target table:==Поле назначения импорта SMW:
|
|
||||||
Define import target table. Default: contentcontrol==Укажите поле назначения импорта. По-умолчанию: contentcontrol
|
|
||||||
Purge content control list on initial sync:==Удалить список управления контентом в начале синхронизации:
|
|
||||||
Purge content control list on initial synchronisation after startup.==Удалить список управления контентом в начале синхронизации после запуска программы.
|
|
||||||
"Submit"=="Сохранить"
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#File: CookieMonitorIncoming_p.html
|
#File: CookieMonitorIncoming_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
|
|
|
@ -1033,31 +1033,6 @@ For minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLe
|
||||||
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少
|
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少
|
||||||
#-----------------------------
|
#-----------------------------
|
||||||
|
|
||||||
#File: ContentControl_p.html
|
|
||||||
#---------------------------
|
|
||||||
Content Control<==内容控制<
|
|
||||||
Peer Content Control URL Filter==节点内容控制地址过滤器
|
|
||||||
With this settings you can activate or deactivate content control on this peer==使用此设置,你可以激活或取消激活此YaCy节点上的内容控制
|
|
||||||
Use content control filtering:==使用内容控制过滤:
|
|
||||||
>Enabled<==>已启用<
|
|
||||||
Enables or disables content control==启用或禁用内容控制
|
|
||||||
Use this table to create filter:==使用此表创建过滤器:
|
|
||||||
Define a table. Default:==定义一个表格. 默认:
|
|
||||||
Content Control SMW Import Settings==内容控制SMW导入设置
|
|
||||||
With this settings you can define the content control import settings. You can define a==使用此设置,你可以定义内容控制导入设置. 你可以定义一个
|
|
||||||
Semantic Media Wiki with the appropriate extensions==语义媒体百科与适当的扩展
|
|
||||||
SMW import to content control list:==SMW导入到内容控制列表:
|
|
||||||
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==启用或禁用来自SMW(Semantic Mediawiki)的内容控制列表的恒定后台同步。 需要重启!
|
|
||||||
SMW import base URL:==SMW导入基URL:
|
|
||||||
Define base URL for SMW special page "Ask". Example: ==为SMW特殊页面“Ask”定义基础地址.例:
|
|
||||||
SMW import target table:==SMW导入目标表:
|
|
||||||
Define import target table. Default: contentcontrol==定义导入目标表. 默认值:contentcontrol
|
|
||||||
Purge content control list on initial sync:==在初始同步时清除内容控制列表:
|
|
||||||
Purge content control list on initial synchronisation after startup.==重启后,清除初始同步的内容控制列表.
|
|
||||||
"Submit"=="提交"
|
|
||||||
Define base URL for SMW special page "Ask". Example:==为SMW特殊页面“Ask”定义基础地址.例:
|
|
||||||
#-----------------------------
|
|
||||||
|
|
||||||
#File: ContentIntegrationPHPBB3_p.html
|
#File: ContentIntegrationPHPBB3_p.html
|
||||||
#---------------------------
|
#---------------------------
|
||||||
Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入
|
Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入
|
||||||
|
|
|
@ -1,90 +0,0 @@
|
||||||
package net.yacy.contentcontrol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import net.yacy.kelondro.blob.Tables;
|
|
||||||
import net.yacy.kelondro.blob.Tables.Row;
|
|
||||||
import net.yacy.repository.FilterEngine;
|
|
||||||
import net.yacy.search.Switchboard;
|
|
||||||
|
|
||||||
public class ContentControlFilterUpdateThread implements Runnable {
|
|
||||||
|
|
||||||
private final Switchboard sb;
|
|
||||||
|
|
||||||
private Boolean locked = false;
|
|
||||||
|
|
||||||
private static FilterEngine networkfilter;
|
|
||||||
|
|
||||||
public ContentControlFilterUpdateThread(final Switchboard sb) {
|
|
||||||
|
|
||||||
this.sb = sb;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void run() {
|
|
||||||
|
|
||||||
if (!this.locked) {
|
|
||||||
|
|
||||||
this.locked = true;
|
|
||||||
|
|
||||||
if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) {
|
|
||||||
|
|
||||||
if (SMWListSyncThread.dirty) {
|
|
||||||
|
|
||||||
networkfilter = updateFilter();
|
|
||||||
|
|
||||||
SMWListSyncThread.dirty = false;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
this.locked = false;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static FilterEngine updateFilter () {
|
|
||||||
|
|
||||||
FilterEngine newfilter = new FilterEngine();
|
|
||||||
|
|
||||||
Switchboard sb = Switchboard.getSwitchboard();
|
|
||||||
|
|
||||||
Iterator<Tables.Row> it;
|
|
||||||
try {
|
|
||||||
it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist",
|
|
||||||
"contentcontrol"));
|
|
||||||
|
|
||||||
while (it.hasNext()) {
|
|
||||||
Row b = it.next();
|
|
||||||
|
|
||||||
if (!b.get("filter", "").equals("")) {
|
|
||||||
|
|
||||||
newfilter.add(b.get("filter", ""), null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (final IOException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return newfilter;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static FilterEngine getNetworkFilter() {
|
|
||||||
FilterEngine f = networkfilter;
|
|
||||||
|
|
||||||
if (f != null && f.size() > 0)
|
|
||||||
return f;
|
|
||||||
|
|
||||||
return null;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,163 +0,0 @@
|
||||||
package net.yacy.contentcontrol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
|
||||||
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
|
||||||
|
|
||||||
import org.json.simple.parser.ContentHandler;
|
|
||||||
import org.json.simple.parser.JSONParser;
|
|
||||||
import org.json.simple.parser.ParseException;
|
|
||||||
|
|
||||||
public class SMWListImporter implements Runnable, ContentHandler{
|
|
||||||
|
|
||||||
// Importer Variables
|
|
||||||
private final ArrayBlockingQueue<SMWListRow> listEntries;
|
|
||||||
private final Reader importFile;
|
|
||||||
|
|
||||||
private SMWListRow row;
|
|
||||||
private final JSONParser parser;
|
|
||||||
|
|
||||||
// Parser Variables
|
|
||||||
private final StringBuilder value;
|
|
||||||
private final StringBuilder key;
|
|
||||||
private final HashMap<String,String> obj;
|
|
||||||
|
|
||||||
private Boolean isElement;
|
|
||||||
|
|
||||||
public SMWListImporter(final Reader importFile, final int queueSize) {
|
|
||||||
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
|
|
||||||
this.importFile = importFile;
|
|
||||||
|
|
||||||
this.row = new SMWListRow();
|
|
||||||
|
|
||||||
this.parser = new JSONParser();
|
|
||||||
|
|
||||||
this.value = new StringBuilder(128);
|
|
||||||
this.key = new StringBuilder(16);
|
|
||||||
this.obj = new HashMap<String,String>();
|
|
||||||
|
|
||||||
this.isElement = false;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void startJSON() throws ParseException, IOException {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void endJSON() throws ParseException, IOException {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean startArray() throws ParseException, IOException {
|
|
||||||
final String key = this.key.toString();
|
|
||||||
|
|
||||||
if (key.equals("items")) {
|
|
||||||
|
|
||||||
this.isElement = true;
|
|
||||||
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean endArray() throws ParseException, IOException {
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean startObject() throws ParseException, IOException {
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean endObject() throws ParseException, IOException {
|
|
||||||
|
|
||||||
if(this.isElement) {
|
|
||||||
|
|
||||||
for (Entry<String, String> e: this.obj.entrySet()) {
|
|
||||||
this.row.add (e.getKey(), e.getValue());
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
this.listEntries.put(this.row);
|
|
||||||
//this.count++;
|
|
||||||
} catch (final InterruptedException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
}
|
|
||||||
this.obj.clear();
|
|
||||||
this.row = new SMWListRow();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean startObjectEntry(String key) throws ParseException, IOException {
|
|
||||||
this.key.setLength(0);
|
|
||||||
this.key.append(key);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean primitive(Object value) throws ParseException, IOException {
|
|
||||||
|
|
||||||
this.value.setLength(0);
|
|
||||||
if(value instanceof java.lang.String) {
|
|
||||||
this.value.append((String)value);
|
|
||||||
} else if(value instanceof java.lang.Boolean) {
|
|
||||||
this.value.append(value);
|
|
||||||
} else if(value instanceof java.lang.Number) {
|
|
||||||
this.value.append(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean endObjectEntry() throws ParseException, IOException {
|
|
||||||
|
|
||||||
final String key = this.key.toString();
|
|
||||||
final String value = this.value.toString();
|
|
||||||
|
|
||||||
this.obj.put(key, value);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
|
|
||||||
this.parser.parse(this.importFile, this, true);
|
|
||||||
|
|
||||||
} catch (final IOException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} catch (final ParseException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} finally {
|
|
||||||
|
|
||||||
try {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
|
|
||||||
this.listEntries.put(SMWListRow.POISON);
|
|
||||||
} catch (final InterruptedException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public SMWListRow take() {
|
|
||||||
try {
|
|
||||||
return this.listEntries.take();
|
|
||||||
} catch (final InterruptedException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,117 +0,0 @@
|
||||||
package net.yacy.contentcontrol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
|
||||||
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
|
||||||
|
|
||||||
import org.json.simple.JSONArray;
|
|
||||||
import org.json.simple.JSONObject;
|
|
||||||
import org.json.simple.parser.JSONParser;
|
|
||||||
import org.json.simple.parser.ParseException;
|
|
||||||
|
|
||||||
public class SMWListImporterFormatObsolete implements Runnable{
|
|
||||||
|
|
||||||
private final ArrayBlockingQueue<SMWListRow> listEntries;
|
|
||||||
private final Reader importFile;
|
|
||||||
private final JSONParser parser;
|
|
||||||
|
|
||||||
public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) {
|
|
||||||
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
|
|
||||||
this.importFile = importFile;
|
|
||||||
this.parser = new JSONParser();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
|
|
||||||
Object obj = this.parser.parse(this.importFile);
|
|
||||||
|
|
||||||
JSONObject jsonObject = (JSONObject) obj;
|
|
||||||
|
|
||||||
JSONArray items = (JSONArray) jsonObject.get("items");
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Iterator<JSONObject> iterator = items.iterator();
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
this.parseItem (iterator.next());
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (final IOException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} catch (final ParseException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} finally {
|
|
||||||
|
|
||||||
try {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
|
|
||||||
this.listEntries.put(SMWListRow.POISON);
|
|
||||||
} catch (final InterruptedException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void parseItem(JSONObject jsonObject) {
|
|
||||||
|
|
||||||
try {
|
|
||||||
SMWListRow row = new SMWListRow();
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Iterator<String> iterator = jsonObject.keySet().iterator();
|
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
String entryKey = iterator.next();
|
|
||||||
|
|
||||||
Object value = jsonObject.get (entryKey);
|
|
||||||
String valueKey = "";
|
|
||||||
|
|
||||||
if (value instanceof java.lang.String) {
|
|
||||||
valueKey = value.toString();
|
|
||||||
} else if (value instanceof JSONArray) {
|
|
||||||
valueKey = jsonListAll ((JSONArray) value);
|
|
||||||
}
|
|
||||||
|
|
||||||
row.add (entryKey, valueKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.listEntries.put(row);
|
|
||||||
|
|
||||||
} catch (final Exception e) {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC", "import of entry failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private String jsonListAll(JSONArray value) {
|
|
||||||
String res = "";
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Iterator<Object> iterator = value.listIterator();
|
|
||||||
while (iterator.hasNext()) {
|
|
||||||
Object val = iterator.next();
|
|
||||||
res += val.toString()+",";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (res.endsWith (",")) {
|
|
||||||
res = res.substring (0, res.length()-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public SMWListRow take() {
|
|
||||||
try {
|
|
||||||
return this.listEntries.take();
|
|
||||||
} catch (final InterruptedException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,24 +0,0 @@
|
||||||
package net.yacy.contentcontrol;
|
|
||||||
|
|
||||||
import net.yacy.kelondro.blob.Tables;
|
|
||||||
|
|
||||||
public class SMWListRow {
|
|
||||||
|
|
||||||
private Tables.Data data;
|
|
||||||
|
|
||||||
public static final SMWListRow POISON = new SMWListRow();
|
|
||||||
public static final SMWListRow EMPTY = new SMWListRow();
|
|
||||||
|
|
||||||
public SMWListRow() {
|
|
||||||
this.data = new Tables.Data();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void add (String key, String value) {
|
|
||||||
this.data.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Tables.Data getData() {
|
|
||||||
return this.data;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,201 +0,0 @@
|
||||||
package net.yacy.contentcontrol;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
|
|
||||||
import net.yacy.cora.document.encoding.UTF8;
|
|
||||||
import net.yacy.cora.protocol.ClientIdentification;
|
|
||||||
import net.yacy.cora.protocol.http.HTTPClient;
|
|
||||||
import net.yacy.cora.util.CommonPattern;
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
|
||||||
import net.yacy.search.Switchboard;
|
|
||||||
|
|
||||||
public class SMWListSyncThread implements Runnable {
|
|
||||||
|
|
||||||
private final Switchboard sb;
|
|
||||||
private Boolean locked = false;
|
|
||||||
private String lastsync = "1900-01-01T01:00:00";
|
|
||||||
private String currenttimestamp = "1900-01-01T01:00:00";
|
|
||||||
private long offset = 0;
|
|
||||||
private final long limit = 500;
|
|
||||||
private long currentmax = 0;
|
|
||||||
private boolean runningjob = false;
|
|
||||||
|
|
||||||
private String targetList;
|
|
||||||
private String parameters;
|
|
||||||
private String query;
|
|
||||||
|
|
||||||
public static Boolean dirty = false;
|
|
||||||
|
|
||||||
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) {
|
|
||||||
this.sb = sb;
|
|
||||||
this.targetList = targetList;
|
|
||||||
this.parameters = parameters;
|
|
||||||
this.query = query;
|
|
||||||
if (purgeOnInit) {
|
|
||||||
this.sb.tables.clear(targetList);
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final String wikiurlify (String s) {
|
|
||||||
String ret = s;
|
|
||||||
ret = ret.replace("-", "-2D");
|
|
||||||
ret = ret.replace("+", "-2B");
|
|
||||||
ret = ret.replace(" ", "-20");
|
|
||||||
ret = ret.replace("[", "-5B");
|
|
||||||
ret = ret.replace("]", "-5D");
|
|
||||||
ret = ret.replace(":", "-3A");
|
|
||||||
ret = ret.replace(">", "-3E");
|
|
||||||
ret = ret.replace("?", "-3F");
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final void run() {
|
|
||||||
|
|
||||||
if (!this.locked) {
|
|
||||||
this.locked = true;
|
|
||||||
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
|
|
||||||
|
|
||||||
if (!this.runningjob) {
|
|
||||||
|
|
||||||
// we have to count all new elements first
|
|
||||||
try {
|
|
||||||
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
|
|
||||||
URL urlCount;
|
|
||||||
|
|
||||||
urlCount = new URL(
|
|
||||||
this.sb.getConfig(
|
|
||||||
"contentcontrol.smwimport.baseurl",
|
|
||||||
"")
|
|
||||||
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
|
|
||||||
|
|
||||||
+ wikiurlify (this.parameters)
|
|
||||||
|
|
||||||
+ "/mainlabel%3D"
|
|
||||||
+ "/offset%3D0"
|
|
||||||
+ "/limit%3D200000"
|
|
||||||
+ "/format%3Dystat");
|
|
||||||
|
|
||||||
String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false));
|
|
||||||
String overallcount = CommonPattern.COMMA.split(reply)[0];
|
|
||||||
String lastsyncstring = CommonPattern.COMMA.split(reply)[1];
|
|
||||||
this.currentmax = Integer.parseInt(overallcount);
|
|
||||||
|
|
||||||
if (this.currentmax > 0) {
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC",
|
|
||||||
"import job counts "
|
|
||||||
+ this.currentmax
|
|
||||||
+ " new elements between "
|
|
||||||
+ this.lastsync + " and "
|
|
||||||
+ this.currenttimestamp);
|
|
||||||
|
|
||||||
this.currenttimestamp = this.lastsync;
|
|
||||||
|
|
||||||
this.runningjob = true;
|
|
||||||
this.lastsync = lastsyncstring;
|
|
||||||
this.offset = 0;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ConcurrentLog.warn("SMWLISTSYNC",
|
|
||||||
"No SMWimport URL defined");
|
|
||||||
}
|
|
||||||
} catch (final MalformedURLException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (final IOException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
// there are new elements to be imported
|
|
||||||
ConcurrentLog.info("SMWLISTSYNC",
|
|
||||||
"importing max. " + this.limit
|
|
||||||
+ " elements at " + this.offset + " of "
|
|
||||||
+ this.currentmax + ", since "
|
|
||||||
+ this.currenttimestamp);
|
|
||||||
URL urlImport;
|
|
||||||
try {
|
|
||||||
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
|
|
||||||
urlImport = new URL(
|
|
||||||
this.sb.getConfig(
|
|
||||||
"contentcontrol.smwimport.baseurl",
|
|
||||||
"")
|
|
||||||
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
|
|
||||||
|
|
||||||
+ wikiurlify (this.parameters)
|
|
||||||
|
|
||||||
+ "/mainlabel%3D"
|
|
||||||
+ "/syntax%3Dobsolete"
|
|
||||||
+ "/offset%3D" + this.offset
|
|
||||||
+ "/limit%3D" + this.limit
|
|
||||||
+ "/format%3Djson");
|
|
||||||
|
|
||||||
this.offset += this.limit;
|
|
||||||
if (this.offset > this.currentmax) {
|
|
||||||
this.runningjob = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
InputStreamReader reader = null;
|
|
||||||
try {
|
|
||||||
reader = new InputStreamReader(
|
|
||||||
urlImport.openStream(), StandardCharsets.UTF_8);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
this.runningjob = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reader != null) {
|
|
||||||
SMWListImporterFormatObsolete smwListImporter = null;
|
|
||||||
try {
|
|
||||||
smwListImporter = new SMWListImporterFormatObsolete(
|
|
||||||
reader, 200);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
// TODO: display an error message
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
this.runningjob = false;
|
|
||||||
}
|
|
||||||
Thread t;
|
|
||||||
SMWListRow row;
|
|
||||||
t = new Thread(smwListImporter,"SMW List Importer");
|
|
||||||
t.start();
|
|
||||||
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
|
|
||||||
if (row == SMWListRow.EMPTY) {
|
|
||||||
this.runningjob = false;
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
this.sb.tables.insert(targetList, row.getData());
|
|
||||||
|
|
||||||
dirty = true;
|
|
||||||
|
|
||||||
} catch (final Exception e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} catch (final MalformedURLException e2) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e2.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
this.locked = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -26,31 +26,31 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||||
|
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
||||||
|
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||||
|
import org.apache.solr.client.solrj.response.LukeResponse;
|
||||||
|
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
|
||||||
import net.yacy.cora.federate.solr.instance.ServerShard;
|
import net.yacy.cora.federate.solr.instance.ServerShard;
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
import net.yacy.search.schema.CollectionSchema;
|
import net.yacy.search.schema.CollectionSchema;
|
||||||
|
|
||||||
import org.apache.solr.common.SolrDocumentList;
|
|
||||||
import org.apache.solr.common.SolrException;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
|
||||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
|
||||||
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
|
||||||
import org.apache.solr.client.solrj.SolrClient;
|
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
|
||||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.LukeRequest;
|
|
||||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
|
||||||
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
|
|
||||||
import org.apache.solr.client.solrj.response.LukeResponse;
|
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
|
||||||
import org.apache.solr.common.params.CommonParams;
|
|
||||||
|
|
||||||
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
|
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
|
||||||
|
|
||||||
protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName());
|
protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName());
|
||||||
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<Byte>(0, true);
|
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<>(0, true);
|
||||||
// pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks
|
// pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks
|
||||||
// this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks
|
// this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks
|
||||||
static {
|
static {
|
||||||
|
@ -158,8 +158,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
@Override
|
@Override
|
||||||
public void deleteByIds(final Collection<String> ids) throws IOException {
|
public void deleteByIds(final Collection<String> ids) throws IOException {
|
||||||
if (this.server == null) return;
|
if (this.server == null) return;
|
||||||
List<String> l = new ArrayList<String>();
|
final List<String> l = new ArrayList<>();
|
||||||
for (String s: ids) l.add(s);
|
for (final String s: ids) l.add(s);
|
||||||
synchronized (this.server) {
|
synchronized (this.server) {
|
||||||
try {
|
try {
|
||||||
this.server.deleteById(l, -1);
|
this.server.deleteById(l, -1);
|
||||||
|
@ -247,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
@Override
|
@Override
|
||||||
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
|
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
|
||||||
if (this.server == null) return;
|
if (this.server == null) return;
|
||||||
for (SolrInputDocument solrdoc : solrdocs) {
|
for (final SolrInputDocument solrdoc : solrdocs) {
|
||||||
if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict"
|
if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict"
|
||||||
}
|
}
|
||||||
synchronized (this.server) {
|
synchronized (this.server) {
|
||||||
|
@ -278,8 +278,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
this.server.add(solrdocs, -1);
|
this.server.add(solrdocs, -1);
|
||||||
} catch (final Throwable ee) {
|
} catch (final Throwable ee) {
|
||||||
ConcurrentLog.logException(ee);
|
ConcurrentLog.logException(ee);
|
||||||
List<String> ids = new ArrayList<String>();
|
final List<String> ids = new ArrayList<>();
|
||||||
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
|
for (final SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
|
||||||
log.warn(e.getMessage() + " IDs=" + ids.toString());
|
log.warn(e.getMessage() + " IDs=" + ids.toString());
|
||||||
throw new IOException(ee);
|
throw new IOException(ee);
|
||||||
}
|
}
|
||||||
|
@ -300,11 +300,11 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException {
|
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException {
|
||||||
if (this.server == null) throw new IOException("server disconnected");
|
if (this.server == null) throw new IOException("server disconnected");
|
||||||
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
|
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
|
||||||
String q = params.get(CommonParams.Q);
|
final String q = params.get(CommonParams.Q);
|
||||||
String fq = params.get(CommonParams.FQ);
|
final String fq = params.get(CommonParams.FQ);
|
||||||
String sort = params.get(CommonParams.SORT);
|
final String sort = params.get(CommonParams.SORT);
|
||||||
String fl = params.get(CommonParams.FL);
|
final String fl = params.get(CommonParams.FL);
|
||||||
String threadname = Thread.currentThread().getName();
|
final String threadname = Thread.currentThread().getName();
|
||||||
QueryResponse rsp;
|
QueryResponse rsp;
|
||||||
int retry = 0;
|
int retry = 0;
|
||||||
Throwable error = null;
|
Throwable error = null;
|
||||||
|
@ -322,7 +322,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
clearCaches(); // prevent further OOM if this was caused by OOM
|
clearCaches(); // prevent further OOM if this was caused by OOM
|
||||||
}
|
}
|
||||||
ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq));
|
ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq));
|
||||||
try {Thread.sleep(1000);} catch (InterruptedException e) {}
|
try {Thread.sleep(1000);} catch (final InterruptedException e) {}
|
||||||
}
|
}
|
||||||
throw new IOException("Error executing query", error);
|
throw new IOException("Error executing query", error);
|
||||||
}
|
}
|
||||||
|
@ -342,10 +342,10 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
public int getSegmentCount() {
|
public int getSegmentCount() {
|
||||||
if (this.server == null) return 0;
|
if (this.server == null) return 0;
|
||||||
try {
|
try {
|
||||||
LukeResponse lukeResponse = getIndexBrowser(false);
|
final LukeResponse lukeResponse = getIndexBrowser(false);
|
||||||
NamedList<Object> info = lukeResponse.getIndexInfo();
|
final NamedList<Object> info = lukeResponse.getIndexInfo();
|
||||||
if (info == null) return 0;
|
if (info == null) return 0;
|
||||||
Integer segmentCount = (Integer) info.get("segmentCount");
|
final Integer segmentCount = (Integer) info.get("segmentCount");
|
||||||
if (segmentCount == null) return 1;
|
if (segmentCount == null) return 1;
|
||||||
return segmentCount.intValue();
|
return segmentCount.intValue();
|
||||||
} catch (final Throwable e) {
|
} catch (final Throwable e) {
|
||||||
|
@ -363,19 +363,19 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
if (this.server instanceof ServerShard) {
|
if (this.server instanceof ServerShard) {
|
||||||
// the server can be a single shard; we don't know here
|
// the server can be a single shard; we don't know here
|
||||||
// to test that, we submit requests to bots variants
|
// to test that, we submit requests to bots variants
|
||||||
if (useluke == 1) return getSizeLukeRequest();
|
if (this.useluke == 1) return getSizeLukeRequest();
|
||||||
if (useluke == -1) return getSizeQueryRequest();
|
if (this.useluke == -1) return getSizeQueryRequest();
|
||||||
long ls = getSizeLukeRequest();
|
final long ls = getSizeLukeRequest();
|
||||||
long qs = getSizeQueryRequest();
|
final long qs = getSizeQueryRequest();
|
||||||
if (ls == 0 && qs == 0) {
|
if (ls == 0 && qs == 0) {
|
||||||
// we don't know if this is caused by an error or not; don't change the useluke value
|
// we don't know if this is caused by an error or not; don't change the useluke value
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (ls == qs) {
|
if (ls == qs) {
|
||||||
useluke = 1;
|
this.useluke = 1;
|
||||||
return ls;
|
return ls;
|
||||||
}
|
}
|
||||||
useluke = -1;
|
this.useluke = -1;
|
||||||
return qs;
|
return qs;
|
||||||
}
|
}
|
||||||
return getSizeLukeRequest();
|
return getSizeLukeRequest();
|
||||||
|
@ -398,9 +398,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
private long getSizeLukeRequest() {
|
private long getSizeLukeRequest() {
|
||||||
if (this.server == null) return 0;
|
if (this.server == null) return 0;
|
||||||
try {
|
try {
|
||||||
LukeResponse lukeResponse = getIndexBrowser(false);
|
final LukeResponse lukeResponse = getIndexBrowser(false);
|
||||||
if (lukeResponse == null) return 0;
|
if (lukeResponse == null) return 0;
|
||||||
Integer numDocs = lukeResponse.getNumDocs();
|
final Integer numDocs = lukeResponse.getNumDocs();
|
||||||
if (numDocs == null) return 0;
|
if (numDocs == null) return 0;
|
||||||
return numDocs.longValue();
|
return numDocs.longValue();
|
||||||
} catch (final Throwable e) {
|
} catch (final Throwable e) {
|
||||||
|
@ -419,7 +419,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
||||||
LukeResponse lukeResponse = null;
|
LukeResponse lukeResponse = null;
|
||||||
try {
|
try {
|
||||||
lukeResponse = lukeRequest.process(this.server);
|
lukeResponse = lukeRequest.process(this.server);
|
||||||
} catch (IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new SolrServerException(e.getMessage());
|
throw new SolrServerException(e.getMessage());
|
||||||
}
|
}
|
||||||
return lukeResponse;
|
return lukeResponse;
|
||||||
|
|
|
@ -27,10 +27,6 @@ import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import net.yacy.cora.document.encoding.ASCII;
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
|
||||||
import net.yacy.kelondro.util.MemoryControl;
|
|
||||||
|
|
||||||
import org.apache.solr.client.solrj.SolrClient;
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||||
import org.apache.solr.core.CoreContainer;
|
import org.apache.solr.core.CoreContainer;
|
||||||
|
@ -38,17 +34,21 @@ import org.apache.solr.core.SolrCore;
|
||||||
|
|
||||||
import com.google.common.io.Files;
|
import com.google.common.io.Files;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.encoding.ASCII;
|
||||||
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
|
import net.yacy.kelondro.util.MemoryControl;
|
||||||
|
|
||||||
public class EmbeddedInstance implements SolrInstance {
|
public class EmbeddedInstance implements SolrInstance {
|
||||||
|
|
||||||
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
|
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
|
||||||
// additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied
|
// additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied
|
||||||
private CoreContainer coreContainer;
|
private CoreContainer coreContainer;
|
||||||
private String defaultCoreName;
|
private final String defaultCoreName;
|
||||||
private SolrCore defaultCore;
|
private final SolrCore defaultCore;
|
||||||
private SolrClient defaultCoreServer;
|
private final SolrClient defaultCoreServer;
|
||||||
private File containerPath;
|
private final File containerPath;
|
||||||
private Map<String, SolrCore> cores;
|
private final Map<String, SolrCore> cores;
|
||||||
private Map<String, SolrClient> server;
|
private final Map<String, SolrClient> server;
|
||||||
|
|
||||||
public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException {
|
public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException {
|
||||||
super();
|
super();
|
||||||
|
@ -56,30 +56,30 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
this.containerPath = containerPath;
|
this.containerPath = containerPath;
|
||||||
|
|
||||||
// ensure that default core path exists
|
// ensure that default core path exists
|
||||||
File defaultCorePath = new File(containerPath, givenDefaultCoreName);
|
final File defaultCorePath = new File(containerPath, givenDefaultCoreName);
|
||||||
if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
|
if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
|
||||||
|
|
||||||
// migrate old conf directory
|
// migrate old conf directory
|
||||||
File oldConf = new File(containerPath, "conf");
|
final File oldConf = new File(containerPath, "conf");
|
||||||
File confDir = new File(defaultCorePath, "conf");
|
final File confDir = new File(defaultCorePath, "conf");
|
||||||
if (oldConf.exists()) oldConf.renameTo(confDir);
|
if (oldConf.exists()) oldConf.renameTo(confDir);
|
||||||
|
|
||||||
// migrate old data directory
|
// migrate old data directory
|
||||||
File oldData = new File(containerPath, "data");
|
final File oldData = new File(containerPath, "data");
|
||||||
File dataDir = new File(defaultCorePath, "data");
|
final File dataDir = new File(defaultCorePath, "data");
|
||||||
if (oldData.exists()) oldData.renameTo(dataDir);
|
if (oldData.exists()) oldData.renameTo(dataDir);
|
||||||
|
|
||||||
// create index subdirectory in data if it does not exist
|
// create index subdirectory in data if it does not exist
|
||||||
File indexDir = new File(dataDir, "index");
|
final File indexDir = new File(dataDir, "index");
|
||||||
if (!indexDir.exists()) indexDir.mkdirs();
|
if (!indexDir.exists()) indexDir.mkdirs();
|
||||||
|
|
||||||
// initialize the cores' configuration
|
// initialize the cores' configuration
|
||||||
for (String coreName: initializeCoreNames) {
|
for (final String coreName: initializeCoreNames) {
|
||||||
initializeCoreConf(solr_config, containerPath, coreName);
|
initializeCoreConf(solr_config, containerPath, coreName);
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the coreContainer
|
// initialize the coreContainer
|
||||||
File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
|
final File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
|
||||||
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
|
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
|
||||||
if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile);
|
if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile);
|
||||||
|
|
||||||
|
@ -94,9 +94,9 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName);
|
this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName);
|
||||||
|
|
||||||
// initialize core cache
|
// initialize core cache
|
||||||
this.cores = new ConcurrentHashMap<String, SolrCore>();
|
this.cores = new ConcurrentHashMap<>();
|
||||||
this.cores.put(this.defaultCoreName, this.defaultCore);
|
this.cores.put(this.defaultCoreName, this.defaultCore);
|
||||||
this.server = new ConcurrentHashMap<String, SolrClient>();
|
this.server = new ConcurrentHashMap<>();
|
||||||
this.server.put(this.defaultCoreName, this.defaultCoreServer);
|
this.server.put(this.defaultCoreName, this.defaultCoreServer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,11 +113,11 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
|
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
|
||||||
|
|
||||||
// ensure that default core path exists
|
// ensure that default core path exists
|
||||||
File corePath = new File(containerPath, coreName);
|
final File corePath = new File(containerPath, coreName);
|
||||||
if (!corePath.exists()) corePath.mkdirs();
|
if (!corePath.exists()) corePath.mkdirs();
|
||||||
|
|
||||||
// check if core.properties exists in each path (thats new in Solr 5.0)
|
// check if core.properties exists in each path (thats new in Solr 5.0)
|
||||||
File core_properties = new File(corePath, "core.properties");
|
final File core_properties = new File(corePath, "core.properties");
|
||||||
if (!core_properties.exists()) {
|
if (!core_properties.exists()) {
|
||||||
// create the file
|
// create the file
|
||||||
try (
|
try (
|
||||||
|
@ -130,25 +130,25 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
fos.write(ASCII.getBytes("config=${solrconfig:solrconfig.xml}\n"));
|
fos.write(ASCII.getBytes("config=${solrconfig:solrconfig.xml}\n"));
|
||||||
fos.write(ASCII.getBytes("schema=${schema:schema.xml}\n"));
|
fos.write(ASCII.getBytes("schema=${schema:schema.xml}\n"));
|
||||||
fos.write(ASCII.getBytes("coreNodeName=${coreNodeName:}\n"));
|
fos.write(ASCII.getBytes("coreNodeName=${coreNodeName:}\n"));
|
||||||
} catch (IOException e) {
|
} catch (final IOException e) {
|
||||||
ConcurrentLog.logException(e);
|
ConcurrentLog.logException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensure necessary subpaths exist
|
// ensure necessary subpaths exist
|
||||||
File conf = new File(corePath, "conf");
|
final File conf = new File(corePath, "conf");
|
||||||
conf.mkdirs();
|
conf.mkdirs();
|
||||||
File data = new File(corePath, "data");
|
final File data = new File(corePath, "data");
|
||||||
data.mkdirs();
|
data.mkdirs();
|
||||||
|
|
||||||
// (over-)write configuration into conf path
|
// (over-)write configuration into conf path
|
||||||
File source, target;
|
File source, target;
|
||||||
for (String cf: confFiles) {
|
for (final String cf: confFiles) {
|
||||||
source = new File(solr_config, cf);
|
source = new File(solr_config, cf);
|
||||||
if (source.isDirectory()) {
|
if (source.isDirectory()) {
|
||||||
target = new File(conf, cf);
|
target = new File(conf, cf);
|
||||||
target.mkdirs();
|
target.mkdirs();
|
||||||
for (String cfl: source.list()) {
|
for (final String cfl: source.list()) {
|
||||||
try {
|
try {
|
||||||
Files.copy(new File(source, cfl), new File(target, cfl));
|
Files.copy(new File(source, cfl), new File(target, cfl));
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
|
@ -168,7 +168,7 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
|
|
||||||
// copy the solrcore.properties
|
// copy the solrcore.properties
|
||||||
// for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists
|
// for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists
|
||||||
String os = System.getProperty("os.arch");
|
final String os = System.getProperty("os.arch");
|
||||||
if (os.contains("64")) {
|
if (os.contains("64")) {
|
||||||
source = new File(solr_config, "solrcore.properties");
|
source = new File(solr_config, "solrcore.properties");
|
||||||
} else {
|
} else {
|
||||||
|
@ -242,7 +242,7 @@ public class EmbeddedInstance implements SolrInstance {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void close() {
|
public synchronized void close() {
|
||||||
for (SolrCore core: cores.values()) core.close();
|
for (final SolrCore core: this.cores.values()) core.close();
|
||||||
if (this.coreContainer != null) try {
|
if (this.coreContainer != null) try {
|
||||||
this.coreContainer.shutdown();
|
this.coreContainer.shutdown();
|
||||||
this.coreContainer = null;
|
this.coreContainer = null;
|
||||||
|
|
|
@ -36,7 +36,6 @@ import java.util.Set;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
|
|
||||||
import net.yacy.cora.date.ISO8601Formatter;
|
import net.yacy.cora.date.ISO8601Formatter;
|
||||||
import net.yacy.cora.document.encoding.ASCII;
|
import net.yacy.cora.document.encoding.ASCII;
|
||||||
import net.yacy.cora.document.encoding.UTF8;
|
import net.yacy.cora.document.encoding.UTF8;
|
||||||
|
@ -424,7 +423,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
|
||||||
if (dbocc != null) {
|
if (dbocc != null) {
|
||||||
return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": " + dbocc.name();
|
return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": " + dbocc.name();
|
||||||
}
|
}
|
||||||
String urls = url.toNormalform(false);
|
final String urls = url.toNormalform(false);
|
||||||
final long oldDate = this.indexSegment.getLoadTime(url.hash());
|
final long oldDate = this.indexSegment.getLoadTime(url.hash());
|
||||||
|
|
||||||
// deny urls that exceed allowed number of occurrences
|
// deny urls that exceed allowed number of occurrences
|
||||||
|
@ -574,26 +573,6 @@ public final class CrawlStacker implements WorkflowTask<Request>{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Switchboard.getSwitchboard().getConfigBool(
|
|
||||||
"contentcontrol.enabled", false) == true) {
|
|
||||||
|
|
||||||
if (!Switchboard.getSwitchboard()
|
|
||||||
.getConfig("contentcontrol.mandatoryfilterlist", "")
|
|
||||||
.equals("")) {
|
|
||||||
final FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
|
|
||||||
if (f != null) {
|
|
||||||
if (!f.isListed(url, null)) {
|
|
||||||
|
|
||||||
return "the url '"
|
|
||||||
+ url
|
|
||||||
+ "' does not belong to the network mandatory filter list";
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
final boolean local = url.isLocal();
|
final boolean local = url.isLocal();
|
||||||
if (this.acceptLocalURLs && local) return null;
|
if (this.acceptLocalURLs && local) return null;
|
||||||
if (this.acceptGlobalURLs && !local) return null;
|
if (this.acceptGlobalURLs && !local) return null;
|
||||||
|
|
|
@ -114,8 +114,6 @@ import com.hazelcast.config.NetworkConfig;
|
||||||
import com.hazelcast.core.Hazelcast;
|
import com.hazelcast.core.Hazelcast;
|
||||||
import com.hazelcast.core.HazelcastInstance;
|
import com.hazelcast.core.HazelcastInstance;
|
||||||
|
|
||||||
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
|
|
||||||
import net.yacy.contentcontrol.SMWListSyncThread;
|
|
||||||
import net.yacy.cora.date.AbstractFormatter;
|
import net.yacy.cora.date.AbstractFormatter;
|
||||||
import net.yacy.cora.date.GenericFormatter;
|
import net.yacy.cora.date.GenericFormatter;
|
||||||
import net.yacy.cora.date.ISO8601Formatter;
|
import net.yacy.cora.date.ISO8601Formatter;
|
||||||
|
@ -501,8 +499,6 @@ public final class Switchboard extends serverSwitch {
|
||||||
// load the network definition
|
// load the network definition
|
||||||
try {
|
try {
|
||||||
this.overwriteNetworkDefinition(this.getSysinfo());
|
this.overwriteNetworkDefinition(this.getSysinfo());
|
||||||
} catch (final FileNotFoundException e) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
ConcurrentLog.logException(e);
|
ConcurrentLog.logException(e);
|
||||||
}
|
}
|
||||||
|
@ -1267,27 +1263,6 @@ public final class Switchboard extends serverSwitch {
|
||||||
Long.parseLong(this.getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")),
|
Long.parseLong(this.getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")),
|
||||||
Double.parseDouble(this.getConfig(SwitchboardConstants.INDEX_DIST_LOADPREREQ, "9.0")));
|
Double.parseDouble(this.getConfig(SwitchboardConstants.INDEX_DIST_LOADPREREQ, "9.0")));
|
||||||
|
|
||||||
// content control: initialize list sync thread
|
|
||||||
this.deployThread(
|
|
||||||
"720_ccimport",
|
|
||||||
"Content Control Import",
|
|
||||||
"this is the content control import thread",
|
|
||||||
null,
|
|
||||||
InstantBusyThread.createFromRunnable(
|
|
||||||
new SMWListSyncThread(this, sb.getConfig("contentcontrol.bookmarklist", "contentcontrol"),
|
|
||||||
"Category:Content Source", "/?Url/?Filter/?Category/?Modification date",
|
|
||||||
sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", false)),
|
|
||||||
3000, 3000),
|
|
||||||
2000);
|
|
||||||
|
|
||||||
this.deployThread(
|
|
||||||
"730_ccfilter",
|
|
||||||
"Content Control Filter",
|
|
||||||
"this is the content control filter update thread",
|
|
||||||
null,
|
|
||||||
InstantBusyThread.createFromRunnable(new ContentControlFilterUpdateThread(this), 3000, 3000),
|
|
||||||
2000);
|
|
||||||
|
|
||||||
// set network-specific performance attributes
|
// set network-specific performance attributes
|
||||||
if ( this.firstInit ) {
|
if ( this.firstInit ) {
|
||||||
this.setRemotecrawlPPM(Math.max(1, (int) this.getConfigLong("network.unit.remotecrawl.speed", 60)));
|
this.setRemotecrawlPPM(Math.max(1, (int) this.getConfigLong("network.unit.remotecrawl.speed", 60)));
|
||||||
|
@ -2059,7 +2034,7 @@ public final class Switchboard extends serverSwitch {
|
||||||
if ( this.dhtDispatcher != null ) {
|
if ( this.dhtDispatcher != null ) {
|
||||||
this.dhtDispatcher.close();
|
this.dhtDispatcher.close();
|
||||||
}
|
}
|
||||||
// de.anomic.http.client.Client.closeAllConnections();
|
// de.anomic.http.client.Client.closeAllConnections();
|
||||||
this.wikiDB.close();
|
this.wikiDB.close();
|
||||||
this.blogDB.close();
|
this.blogDB.close();
|
||||||
this.blogCommentDB.close();
|
this.blogCommentDB.close();
|
||||||
|
@ -2243,8 +2218,6 @@ public final class Switchboard extends serverSwitch {
|
||||||
if ( gzfile.exists() ) {
|
if ( gzfile.exists() ) {
|
||||||
FileUtils.deletedelete(outfile);
|
FileUtils.deletedelete(outfile);
|
||||||
}
|
}
|
||||||
} catch (final FileNotFoundException e ) {
|
|
||||||
ConcurrentLog.logException(e);
|
|
||||||
} catch (final IOException e ) {
|
} catch (final IOException e ) {
|
||||||
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
|
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
|
||||||
ConcurrentLog.logException(e);
|
ConcurrentLog.logException(e);
|
||||||
|
@ -3112,8 +3085,6 @@ public final class Switchboard extends serverSwitch {
|
||||||
Document[] documents = null;
|
Document[] documents = null;
|
||||||
try {
|
try {
|
||||||
documents = this.parseDocument(in.queueEntry);
|
documents = this.parseDocument(in.queueEntry);
|
||||||
} catch (final InterruptedException e ) {
|
|
||||||
documents = null;
|
|
||||||
} catch (final Exception e ) {
|
} catch (final Exception e ) {
|
||||||
documents = null;
|
documents = null;
|
||||||
}
|
}
|
||||||
|
@ -4291,7 +4262,7 @@ public final class Switchboard extends serverSwitch {
|
||||||
this.log.info("dhtTransferJob: too many connections in httpc pool : "
|
this.log.info("dhtTransferJob: too many connections in httpc pool : "
|
||||||
+ ConnectionInfo.getCount());
|
+ ConnectionInfo.getCount());
|
||||||
// close unused connections
|
// close unused connections
|
||||||
// Client.cleanup();
|
// Client.cleanup();
|
||||||
} else if ( kbytesUp > 128 ) {
|
} else if ( kbytesUp > 128 ) {
|
||||||
this.log.info("dhtTransferJob: too much upload(1), currently uploading: " + kbytesUp + " Kb");
|
this.log.info("dhtTransferJob: too much upload(1), currently uploading: " + kbytesUp + " Kb");
|
||||||
} else {
|
} else {
|
||||||
|
@ -4331,7 +4302,7 @@ public final class Switchboard extends serverSwitch {
|
||||||
this.log.info("dhtTransferJob: too many connections in httpc pool : "
|
this.log.info("dhtTransferJob: too many connections in httpc pool : "
|
||||||
+ ConnectionInfo.getCount());
|
+ ConnectionInfo.getCount());
|
||||||
// close unused connections
|
// close unused connections
|
||||||
// Client.cleanup();
|
// Client.cleanup();
|
||||||
} else if ( kbytesUp > 256 ) {
|
} else if ( kbytesUp > 256 ) {
|
||||||
this.log.info("dhtTransferJob: too much upload(2), currently uploading: " + kbytesUp + " Kb");
|
this.log.info("dhtTransferJob: too much upload(2), currently uploading: " + kbytesUp + " Kb");
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -51,7 +51,6 @@ import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
|
||||||
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
|
|
||||||
import net.yacy.cora.date.ISO8601Formatter;
|
import net.yacy.cora.date.ISO8601Formatter;
|
||||||
import net.yacy.cora.document.analysis.Classification;
|
import net.yacy.cora.document.analysis.Classification;
|
||||||
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
import net.yacy.cora.document.analysis.Classification.ContentDomain;
|
||||||
|
@ -95,7 +94,6 @@ import net.yacy.peers.RemoteSearch;
|
||||||
import net.yacy.peers.SeedDB;
|
import net.yacy.peers.SeedDB;
|
||||||
import net.yacy.peers.graphics.ProfilingGraph;
|
import net.yacy.peers.graphics.ProfilingGraph;
|
||||||
import net.yacy.repository.Blacklist.BlacklistType;
|
import net.yacy.repository.Blacklist.BlacklistType;
|
||||||
import net.yacy.repository.FilterEngine;
|
|
||||||
import net.yacy.repository.LoaderDispatcher;
|
import net.yacy.repository.LoaderDispatcher;
|
||||||
import net.yacy.search.EventTracker;
|
import net.yacy.search.EventTracker;
|
||||||
import net.yacy.search.Switchboard;
|
import net.yacy.search.Switchboard;
|
||||||
|
@ -186,9 +184,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
/** a set of words that are used to match with the snippets */
|
/** a set of words that are used to match with the snippets */
|
||||||
private final Set<String> snippetFetchWords;
|
private final Set<String> snippetFetchWords;
|
||||||
private final boolean deleteIfSnippetFail;
|
private final boolean deleteIfSnippetFail;
|
||||||
private long urlRetrievalAllTime;
|
private final long urlRetrievalAllTime;
|
||||||
private long snippetComputationAllTime;
|
private final long snippetComputationAllTime;
|
||||||
private ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
|
private final ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
|
||||||
private final boolean remote;
|
private final boolean remote;
|
||||||
|
|
||||||
/** add received results to local index (defult=true) */
|
/** add received results to local index (defult=true) */
|
||||||
|
@ -283,7 +281,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
return Math.max(
|
return Math.max(
|
||||||
this.local_rwi_available.get() + this.remote_rwi_available.get() +
|
this.local_rwi_available.get() + this.remote_rwi_available.get() +
|
||||||
this.remote_solr_available.get() + Math.max(0, this.local_solr_stored.get() - this.local_solr_evicted.get()),
|
this.remote_solr_available.get() + Math.max(0, this.local_solr_stored.get() - this.local_solr_evicted.get()),
|
||||||
imageViewed.size() + sizeSpare()
|
this.imageViewed.size() + sizeSpare()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -324,17 +322,17 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
long ab = MemoryControl.available();
|
long ab = MemoryControl.available();
|
||||||
if (ab < 1024 * 1024 * 200) {
|
if (ab < 1024 * 1024 * 200) {
|
||||||
int eb = SearchEventCache.size();
|
final int eb = SearchEventCache.size();
|
||||||
SearchEventCache.cleanupEvents(false);
|
SearchEventCache.cleanupEvents(false);
|
||||||
int en = SearchEventCache.size();
|
final int en = SearchEventCache.size();
|
||||||
if (en < eb) {
|
if (en < eb) {
|
||||||
log.info("Cleaned up search event cache (1) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
|
log.info("Cleaned up search event cache (1) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ab = MemoryControl.available();
|
ab = MemoryControl.available();
|
||||||
int eb = SearchEventCache.size();
|
final int eb = SearchEventCache.size();
|
||||||
SearchEventCache.cleanupEvents(Math.max(1, (int) (MemoryControl.available() / (1024 * 1024 * 120))));
|
SearchEventCache.cleanupEvents(Math.max(1, (int) (MemoryControl.available() / (1024 * 1024 * 120))));
|
||||||
int en = SearchEventCache.size();
|
final int en = SearchEventCache.size();
|
||||||
if (en < eb) {
|
if (en < eb) {
|
||||||
log.info("Cleaned up search event cache (2) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
|
log.info("Cleaned up search event cache (2) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
|
||||||
}
|
}
|
||||||
|
@ -348,7 +346,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.imagePageCounter = query.offset;
|
this.imagePageCounter = query.offset;
|
||||||
}
|
}
|
||||||
this.loader = loader;
|
this.loader = loader;
|
||||||
this.nodeStack = new WeakPriorityBlockingQueue<URIMetadataNode>(max_results_node, false);
|
this.nodeStack = new WeakPriorityBlockingQueue<>(max_results_node, false);
|
||||||
this.maxExpectedRemoteReferences = new AtomicInteger(0);
|
this.maxExpectedRemoteReferences = new AtomicInteger(0);
|
||||||
this.expectedRemoteReferences = new AtomicInteger(0);
|
this.expectedRemoteReferences = new AtomicInteger(0);
|
||||||
this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true);
|
this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true);
|
||||||
|
@ -377,7 +375,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap<>(this) : null;
|
this.protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap<>(this) : null;
|
||||||
this.dateNavigator = dateNavEnabled ? new ConcurrentScoreMap<>(this) : null;
|
this.dateNavigator = dateNavEnabled ? new ConcurrentScoreMap<>(this) : null;
|
||||||
this.topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0;
|
this.topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0;
|
||||||
this.vocabularyNavigator = new TreeMap<String, ScoreMap<String>>();
|
this.vocabularyNavigator = new TreeMap<>();
|
||||||
// prepare configured search navigation (plugins)
|
// prepare configured search navigation (plugins)
|
||||||
this.navigatorPlugins = NavigatorPlugins.initFromCfgStrings(navConfigs);
|
this.navigatorPlugins = NavigatorPlugins.initFromCfgStrings(navConfigs);
|
||||||
if(this.navigatorPlugins != null) {
|
if(this.navigatorPlugins != null) {
|
||||||
|
@ -386,14 +384,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this.snippets = new ConcurrentHashMap<String, LinkedHashSet<String>>();
|
this.snippets = new ConcurrentHashMap<>();
|
||||||
this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
|
this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
|
||||||
if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start();
|
if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start();
|
||||||
this.secondarySearchThreads = null;
|
this.secondarySearchThreads = null;
|
||||||
this.preselectedPeerHashes = preselectedPeerHashes;
|
this.preselectedPeerHashes = preselectedPeerHashes;
|
||||||
this.IAResults = new TreeMap<byte[], String>(Base64Order.enhancedCoder);
|
this.IAResults = new TreeMap<>(Base64Order.enhancedCoder);
|
||||||
this.IACount = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder);
|
this.IACount = new TreeMap<>(Base64Order.enhancedCoder);
|
||||||
this.heuristics = new TreeMap<byte[], HeuristicResult>(Base64Order.enhancedCoder);
|
this.heuristics = new TreeMap<>(Base64Order.enhancedCoder);
|
||||||
this.IAmaxcounthash = null;
|
this.IAmaxcounthash = null;
|
||||||
this.IAneardhthash = null;
|
this.IAneardhthash = null;
|
||||||
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
|
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
|
||||||
|
@ -420,10 +418,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
// attention: if minEntries is too high, this method will not terminate within the maxTime
|
||||||
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
// sortorder: 0 = hash, 1 = url, 2 = ranking
|
||||||
this.localSearchInclusion = null;
|
this.localSearchInclusion = null;
|
||||||
this.ref = new ConcurrentScoreMap<String>(this);
|
this.ref = new ConcurrentScoreMap<>(this);
|
||||||
this.maxtime = query.maxtime;
|
this.maxtime = query.maxtime;
|
||||||
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
|
this.rwiStack = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
|
||||||
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>();
|
this.doubleDomCache = new ConcurrentHashMap<>();
|
||||||
this.flagcount = new int[32];
|
this.flagcount = new int[32];
|
||||||
for ( int i = 0; i < 32; i++ ) {
|
for ( int i = 0; i < 32; i++ ) {
|
||||||
this.flagcount[i] = 0;
|
this.flagcount[i] = 0;
|
||||||
|
@ -435,8 +433,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.receivedRemoteReferences = new AtomicInteger(0);
|
this.receivedRemoteReferences = new AtomicInteger(0);
|
||||||
this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang);
|
this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang);
|
||||||
this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100);
|
this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100);
|
||||||
this.taggingPredicates = new HashMap<String, String>();
|
this.taggingPredicates = new HashMap<>();
|
||||||
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) {
|
for (final Tagging t: LibraryProvider.autotagging.getVocabularies()) {
|
||||||
this.taggingPredicates.put(t.getName(), t.getPredicate());
|
this.taggingPredicates.put(t.getName(), t.getPredicate());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -453,8 +451,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.rwiProcess = null;
|
this.rwiProcess = null;
|
||||||
if (query.getSegment().connectedRWI() && !Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_DHT_OFF, false)) {
|
if (query.getSegment().connectedRWI() && !Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_DHT_OFF, false)) {
|
||||||
// we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old
|
// we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old
|
||||||
rwiProcess = new RWIProcess(this.localsolrsearch);
|
this.rwiProcess = new RWIProcess(this.localsolrsearch);
|
||||||
rwiProcess.start();
|
this.rwiProcess.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.remote) {
|
if (this.remote) {
|
||||||
|
@ -465,8 +463,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.primarySearchThreadsL = null;
|
this.primarySearchThreadsL = null;
|
||||||
this.nodeSearchThreads = null;
|
this.nodeSearchThreads = null;
|
||||||
} else {
|
} else {
|
||||||
this.primarySearchThreadsL = new ArrayList<RemoteSearch>();
|
this.primarySearchThreadsL = new ArrayList<>();
|
||||||
this.nodeSearchThreads = new ArrayList<Thread>();
|
this.nodeSearchThreads = new ArrayList<>();
|
||||||
// start this concurrently because the remote search needs an enumeration
|
// start this concurrently because the remote search needs an enumeration
|
||||||
// of the remote peers which may block in some cases when i.e. DHT is active
|
// of the remote peers which may block in some cases when i.e. DHT is active
|
||||||
// at the same time.
|
// at the same time.
|
||||||
|
@ -502,7 +500,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
if ( generateAbstracts ) {
|
if ( generateAbstracts ) {
|
||||||
// we need the results now
|
// we need the results now
|
||||||
try {
|
try {
|
||||||
if (rwiProcess != null && query.getSegment().connectedRWI()) rwiProcess.join();
|
if (this.rwiProcess != null && query.getSegment().connectedRWI()) this.rwiProcess.join();
|
||||||
} catch (final Throwable e ) {
|
} catch (final Throwable e ) {
|
||||||
}
|
}
|
||||||
// compute index abstracts
|
// compute index abstracts
|
||||||
|
@ -535,7 +533,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// give process time to accumulate a certain amount of data
|
// give process time to accumulate a certain amount of data
|
||||||
// before a reading process wants to get results from it
|
// before a reading process wants to get results from it
|
||||||
try {
|
try {
|
||||||
if (rwiProcess != null && query.getSegment().connectedRWI() && rwiProcess.isAlive()) rwiProcess.join(100);
|
if (this.rwiProcess != null && query.getSegment().connectedRWI() && this.rwiProcess.isAlive()) this.rwiProcess.join(100);
|
||||||
} catch (final Throwable e ) {
|
} catch (final Throwable e ) {
|
||||||
}
|
}
|
||||||
// this will reduce the maximum waiting time until results are available to 100 milliseconds
|
// this will reduce the maximum waiting time until results are available to 100 milliseconds
|
||||||
|
@ -547,14 +545,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.deleteIfSnippetFail = deleteIfSnippetFail;
|
this.deleteIfSnippetFail = deleteIfSnippetFail;
|
||||||
this.urlRetrievalAllTime = 0;
|
this.urlRetrievalAllTime = 0;
|
||||||
this.snippetComputationAllTime = 0;
|
this.snippetComputationAllTime = 0;
|
||||||
this.resultList = new WeakPriorityBlockingQueue<URIMetadataNode>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
|
this.resultList = new WeakPriorityBlockingQueue<>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
|
||||||
|
|
||||||
// snippets do not need to match with the complete query hashes,
|
// snippets do not need to match with the complete query hashes,
|
||||||
// only with the query minus the stopwords which had not been used for the search
|
// only with the query minus the stopwords which had not been used for the search
|
||||||
boolean filtered = false;
|
boolean filtered = false;
|
||||||
// check if query contains stopword
|
// check if query contains stopword
|
||||||
if (Switchboard.stopwordHashes != null) {
|
if (Switchboard.stopwordHashes != null) {
|
||||||
Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator();
|
final Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
if (Switchboard.stopwordHashes.contains((it.next()))) {
|
if (Switchboard.stopwordHashes.contains((it.next()))) {
|
||||||
filtered = true;
|
filtered = true;
|
||||||
|
@ -600,7 +598,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
|
|
||||||
if (query.getSegment().termIndex() == null) return; // nothing to do; this index is not used
|
if (SearchEvent.this.query.getSegment().termIndex() == null) return; // nothing to do; this index is not used
|
||||||
|
|
||||||
// do a search
|
// do a search
|
||||||
oneFeederStarted();
|
oneFeederStarted();
|
||||||
|
@ -634,7 +632,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
SearchEvent.this.query.modifier.sitehost != null && SearchEvent.this.query.modifier.sitehost.length() > 0
|
SearchEvent.this.query.modifier.sitehost != null && SearchEvent.this.query.modifier.sitehost.length() > 0
|
||||||
) {
|
) {
|
||||||
// try again with sitehost
|
// try again with sitehost
|
||||||
String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost);
|
final String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost);
|
||||||
search =
|
search =
|
||||||
SearchEvent.this.query
|
SearchEvent.this.query
|
||||||
.getSegment()
|
.getSegment()
|
||||||
|
@ -695,7 +693,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// normalize entries
|
// normalize entries
|
||||||
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
|
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
|
||||||
int is = index.size();
|
final int is = index.size();
|
||||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||||
this.query.id(true),
|
this.query.id(true),
|
||||||
SearchEventType.NORMALIZING,
|
SearchEventType.NORMALIZING,
|
||||||
|
@ -708,7 +706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
timer = System.currentTimeMillis();
|
timer = System.currentTimeMillis();
|
||||||
|
|
||||||
// apply all constraints
|
// apply all constraints
|
||||||
long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
|
final long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
|
||||||
int successcounter = 0;
|
int successcounter = 0;
|
||||||
try {
|
try {
|
||||||
WordReferenceVars iEntry;
|
WordReferenceVars iEntry;
|
||||||
|
@ -716,7 +714,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
String acceptableAlternativeSitehash = null;
|
String acceptableAlternativeSitehash = null;
|
||||||
if (this.query.modifier.sitehost != null && this.query.modifier.sitehost.length() > 0) try {
|
if (this.query.modifier.sitehost != null && this.query.modifier.sitehost.length() > 0) try {
|
||||||
acceptableAlternativeSitehash = DigestURL.hosthash(this.query.modifier.sitehost.startsWith("www.") ? this.query.modifier.sitehost.substring(4) : "www." + this.query.modifier.sitehost, 80);
|
acceptableAlternativeSitehash = DigestURL.hosthash(this.query.modifier.sitehost.startsWith("www.") ? this.query.modifier.sitehost.substring(4) : "www." + this.query.modifier.sitehost, 80);
|
||||||
} catch (MalformedURLException e1) {}
|
} catch (final MalformedURLException e1) {}
|
||||||
pollloop: while ( true ) {
|
pollloop: while ( true ) {
|
||||||
remaining = timeout - System.currentTimeMillis();
|
remaining = timeout - System.currentTimeMillis();
|
||||||
if (remaining <= 0) {
|
if (remaining <= 0) {
|
||||||
|
@ -740,7 +738,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// increase flag counts
|
// increase flag counts
|
||||||
Bitfield flags = iEntry.flags();
|
final Bitfield flags = iEntry.flags();
|
||||||
for (int j = 0; j < 32; j++) {
|
for (int j = 0; j < 32; j++) {
|
||||||
if (flags.get(j)) this.flagcount[j]++;
|
if (flags.get(j)) this.flagcount[j]++;
|
||||||
}
|
}
|
||||||
|
@ -806,7 +804,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
this.urlhashes.putUnique(iEntry.urlhash());
|
this.urlhashes.putUnique(iEntry.urlhash());
|
||||||
rankingtryloop: while (true) {
|
rankingtryloop: while (true) {
|
||||||
try {
|
try {
|
||||||
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
this.rwiStack.put(new ReverseElement<>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
|
||||||
break rankingtryloop;
|
break rankingtryloop;
|
||||||
} catch (final ArithmeticException e ) {
|
} catch (final ArithmeticException e ) {
|
||||||
// this may happen if the concurrent normalizer changes values during cardinal computation
|
// this may happen if the concurrent normalizer changes values during cardinal computation
|
||||||
|
@ -821,8 +819,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
if (System.currentTimeMillis() >= timeout) ConcurrentLog.warn("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
|
if (System.currentTimeMillis() >= timeout) ConcurrentLog.warn("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
|
||||||
|
|
||||||
} catch (final InterruptedException e ) {
|
} catch (final InterruptedException | SpaceExceededException e ) {
|
||||||
} catch (final SpaceExceededException e ) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
|
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
|
||||||
|
@ -847,7 +844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// stop all threads
|
// stop all threads
|
||||||
if (this.localsolrsearch != null) {
|
if (this.localsolrsearch != null) {
|
||||||
if (localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();}
|
if (this.localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();}
|
||||||
}
|
}
|
||||||
if (this.nodeSearchThreads != null) {
|
if (this.nodeSearchThreads != null) {
|
||||||
for (final Thread search : this.nodeSearchThreads) {
|
for (final Thread search : this.nodeSearchThreads) {
|
||||||
|
@ -969,7 +966,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
long timer = System.currentTimeMillis();
|
long timer = System.currentTimeMillis();
|
||||||
|
|
||||||
// normalize entries
|
// normalize entries
|
||||||
int is = nodeList.size();
|
final int is = nodeList.size();
|
||||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.NORMALIZING, resourceName, is, System.currentTimeMillis() - timer), false);
|
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.NORMALIZING, resourceName, is, System.currentTimeMillis() - timer), false);
|
||||||
if (!local) {
|
if (!local) {
|
||||||
this.receivedRemoteReferences.addAndGet(is);
|
this.receivedRemoteReferences.addAndGet(is);
|
||||||
|
@ -985,7 +982,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// apply all constraints
|
// apply all constraints
|
||||||
try {
|
try {
|
||||||
pollloop: for (URIMetadataNode iEntry: nodeList) {
|
pollloop: for (final URIMetadataNode iEntry: nodeList) {
|
||||||
|
|
||||||
// check url related eventual constraints (protocol, tld, sitehost, and filetype)
|
// check url related eventual constraints (protocol, tld, sitehost, and filetype)
|
||||||
final String matchingResult = QueryParams.matchesURL(this.query.modifier, this.query.tld, iEntry.url());
|
final String matchingResult = QueryParams.matchesURL(this.query.modifier, this.query.tld, iEntry.url());
|
||||||
|
@ -1019,7 +1016,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check constraints
|
// check constraints
|
||||||
Bitfield flags = iEntry.flags();
|
final Bitfield flags = iEntry.flags();
|
||||||
if (!this.testFlags(flags)) {
|
if (!this.testFlags(flags)) {
|
||||||
if (log.isFine()) log.fine("dropped Node: flag test");
|
if (log.isFine()) log.fine("dropped Node: flag test");
|
||||||
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
|
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
|
||||||
|
@ -1049,7 +1046,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// filter out media links in text search, if wanted
|
// filter out media links in text search, if wanted
|
||||||
String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName());
|
final String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName());
|
||||||
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
|
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
|
||||||
if (log.isFine()) log.fine("dropped Node: file name domain does not match");
|
if (log.isFine()) log.fine("dropped Node: file name domain does not match");
|
||||||
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
|
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
|
||||||
|
@ -1097,12 +1094,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
long score;
|
long score;
|
||||||
// determine nodestack ranking (will be altered by postranking)
|
// determine nodestack ranking (will be altered by postranking)
|
||||||
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
|
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
|
||||||
Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
|
final Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
|
||||||
if (scorex != null && scorex > 0)
|
if (scorex != null && scorex > 0)
|
||||||
score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
|
score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
|
||||||
else
|
else
|
||||||
score = this.order.cardinal(iEntry);
|
score = this.order.cardinal(iEntry);
|
||||||
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
|
this.nodeStack.put(new ReverseElement<>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
|
||||||
break rankingtryloop;
|
break rankingtryloop;
|
||||||
} catch (final ArithmeticException e ) {
|
} catch (final ArithmeticException e ) {
|
||||||
// this may happen if the concurrent normalizer changes values during cardinal computation
|
// this may happen if the concurrent normalizer changes values during cardinal computation
|
||||||
|
@ -1131,8 +1128,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
private void incrNavigatorsFromSolrFacets(final Map<String, ReversibleScoreMap<String>> facets) {
|
private void incrNavigatorsFromSolrFacets(final Map<String, ReversibleScoreMap<String>> facets) {
|
||||||
if(facets != null && !facets.isEmpty()) {
|
if(facets != null && !facets.isEmpty()) {
|
||||||
/* Iterate over active navigator plugins to let them update the counters */
|
/* Iterate over active navigator plugins to let them update the counters */
|
||||||
for (String s : this.navigatorPlugins.keySet()) {
|
for (final String s : this.navigatorPlugins.keySet()) {
|
||||||
Navigator navi = this.navigatorPlugins.get(s);
|
final Navigator navi = this.navigatorPlugins.get(s);
|
||||||
if (navi != null) {
|
if (navi != null) {
|
||||||
navi.incFacet(facets);
|
navi.incFacet(facets);
|
||||||
}
|
}
|
||||||
|
@ -1144,8 +1141,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
* is expressed as a spatial filter not producing facets counts (see QueryParams.getFacetsFilterQueries()). */
|
* is expressed as a spatial filter not producing facets counts (see QueryParams.getFacetsFilterQueries()). */
|
||||||
fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
|
fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
|
||||||
if (fcts != null) {
|
if (fcts != null) {
|
||||||
for (String coordinate: fcts) {
|
for (final String coordinate: fcts) {
|
||||||
int hc = fcts.get(coordinate);
|
final int hc = fcts.get(coordinate);
|
||||||
if (hc == 0) continue;
|
if (hc == 0) continue;
|
||||||
this.locationNavigator.inc(coordinate, hc);
|
this.locationNavigator.inc(coordinate, hc);
|
||||||
}
|
}
|
||||||
|
@ -1161,9 +1158,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName());
|
fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName());
|
||||||
if (fcts != null) {
|
if (fcts != null) {
|
||||||
// remove all protocols that we don't know
|
// remove all protocols that we don't know
|
||||||
Iterator<String> i = fcts.iterator();
|
final Iterator<String> i = fcts.iterator();
|
||||||
while (i.hasNext()) {
|
while (i.hasNext()) {
|
||||||
String protocol = i.next();
|
final String protocol = i.next();
|
||||||
if (PROTOCOL_NAVIGATOR_SUPPORTED_VALUES.indexOf(protocol) < 0) {
|
if (PROTOCOL_NAVIGATOR_SUPPORTED_VALUES.indexOf(protocol) < 0) {
|
||||||
i.remove();
|
i.remove();
|
||||||
}
|
}
|
||||||
|
@ -1173,15 +1170,15 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the vocabulary navigation
|
// get the vocabulary navigation
|
||||||
Set<String> genericFacets = new LinkedHashSet<>();
|
final Set<String> genericFacets = new LinkedHashSet<>();
|
||||||
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName());
|
for (final Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName());
|
||||||
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
||||||
for (String vocName: genericFacets) {
|
for (final String vocName: genericFacets) {
|
||||||
fcts = facets.get(CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
|
fcts = facets.get(CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
|
||||||
if (fcts != null) {
|
if (fcts != null) {
|
||||||
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
||||||
if (vocNav == null) {
|
if (vocNav == null) {
|
||||||
vocNav = new ConcurrentScoreMap<String>();
|
vocNav = new ConcurrentScoreMap<>();
|
||||||
this.vocabularyNavigator.put(vocName, vocNav);
|
this.vocabularyNavigator.put(vocName, vocNav);
|
||||||
}
|
}
|
||||||
vocNav.inc(fcts);
|
vocNav.inc(fcts);
|
||||||
|
@ -1199,8 +1196,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
final Map<String, ReversibleScoreMap<String>> facets) {
|
final Map<String, ReversibleScoreMap<String>> facets) {
|
||||||
|
|
||||||
/* Iterate over active navigator plugins to let them update the counters */
|
/* Iterate over active navigator plugins to let them update the counters */
|
||||||
for (String s : this.navigatorPlugins.keySet()) {
|
for (final String s : this.navigatorPlugins.keySet()) {
|
||||||
Navigator navi = this.navigatorPlugins.get(s);
|
final Navigator navi = this.navigatorPlugins.get(s);
|
||||||
if (navi != null && facets == null || !facets.containsKey(navi.getIndexFieldName())) {
|
if (navi != null && facets == null || !facets.containsKey(navi.getIndexFieldName())) {
|
||||||
navi.incDoc(doc);
|
navi.incDoc(doc);
|
||||||
}
|
}
|
||||||
|
@ -1211,7 +1208,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
if (this.dateNavigator != null) {
|
if (this.dateNavigator != null) {
|
||||||
if (facets == null || !facets.containsKey(CollectionSchema.dates_in_content_dts.getSolrFieldName())) {
|
if (facets == null || !facets.containsKey(CollectionSchema.dates_in_content_dts.getSolrFieldName())) {
|
||||||
Date[] dates = doc.datesInContent();
|
final Date[] dates = doc.datesInContent();
|
||||||
if (dates != null) {
|
if (dates != null) {
|
||||||
for (final Date date : dates) {
|
for (final Date date : dates) {
|
||||||
if (date != null) {
|
if (date != null) {
|
||||||
|
@ -1234,12 +1231,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// get the vocabulary navigation
|
// get the vocabulary navigation
|
||||||
if(this.vocabularyNavigator != null) {
|
if(this.vocabularyNavigator != null) {
|
||||||
Set<String> genericFacets = new LinkedHashSet<>();
|
final Set<String> genericFacets = new LinkedHashSet<>();
|
||||||
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
||||||
genericFacets.add(v.getName());
|
genericFacets.add(v.getName());
|
||||||
}
|
}
|
||||||
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
||||||
for (String vocName : genericFacets) {
|
for (final String vocName : genericFacets) {
|
||||||
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX;
|
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX;
|
||||||
if (facets == null || !facets.containsKey(fieldName)) {
|
if (facets == null || !facets.containsKey(fieldName)) {
|
||||||
incrementVocNavigator(doc, vocName, fieldName);
|
incrementVocNavigator(doc, vocName, fieldName);
|
||||||
|
@ -1259,7 +1256,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
if(docValue instanceof String) {
|
if(docValue instanceof String) {
|
||||||
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
||||||
if (vocNav == null) {
|
if (vocNav == null) {
|
||||||
vocNav = new ConcurrentScoreMap<String>();
|
vocNav = new ConcurrentScoreMap<>();
|
||||||
this.vocabularyNavigator.put(vocName, vocNav);
|
this.vocabularyNavigator.put(vocName, vocNav);
|
||||||
}
|
}
|
||||||
vocNav.inc((String)docValue);
|
vocNav.inc((String)docValue);
|
||||||
|
@ -1267,7 +1264,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
if (!((Collection<?>) docValue).isEmpty()) {
|
if (!((Collection<?>) docValue).isEmpty()) {
|
||||||
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
||||||
if (vocNav == null) {
|
if (vocNav == null) {
|
||||||
vocNav = new ConcurrentScoreMap<String>();
|
vocNav = new ConcurrentScoreMap<>();
|
||||||
this.vocabularyNavigator.put(vocName, vocNav);
|
this.vocabularyNavigator.put(vocName, vocNav);
|
||||||
}
|
}
|
||||||
for (final Object singleDocValue : (Collection<?>) docValue) {
|
for (final Object singleDocValue : (Collection<?>) docValue) {
|
||||||
|
@ -1306,7 +1303,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
rwi = this.rwiStack.poll();
|
rwi = this.rwiStack.poll();
|
||||||
if (rwi == null) return null;
|
if (rwi == null) return null;
|
||||||
if (!skipDoubleDom) {
|
if (!skipDoubleDom) {
|
||||||
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
|
final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
decrementCounts(rwi.getElement());
|
decrementCounts(rwi.getElement());
|
||||||
continue pollloop;
|
continue pollloop;
|
||||||
|
@ -1322,9 +1319,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
m = this.doubleDomCache.get(hosthash);
|
m = this.doubleDomCache.get(hosthash);
|
||||||
if (m == null) {
|
if (m == null) {
|
||||||
// first appearance of dom. we create an entry to signal that one of that domain was already returned
|
// first appearance of dom. we create an entry to signal that one of that domain was already returned
|
||||||
m = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false);
|
m = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
|
||||||
this.doubleDomCache.put(hosthash, m);
|
this.doubleDomCache.put(hosthash, m);
|
||||||
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
|
final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
decrementCounts(rwi.getElement());
|
decrementCounts(rwi.getElement());
|
||||||
continue pollloop;
|
continue pollloop;
|
||||||
|
@ -1390,7 +1387,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
URIMetadataNode node = null;
|
URIMetadataNode node = null;
|
||||||
try {
|
try {
|
||||||
node = this.query.getSegment().fulltext().getMetadata(bestEntry);
|
node = this.query.getSegment().fulltext().getMetadata(bestEntry);
|
||||||
} catch (Throwable e) {
|
} catch (final Throwable e) {
|
||||||
ConcurrentLog.logException(e);
|
ConcurrentLog.logException(e);
|
||||||
}
|
}
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
|
@ -1442,7 +1439,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check content domain
|
// check content domain
|
||||||
ContentDomain contentDomain = page.getContentDomain();
|
final ContentDomain contentDomain = page.getContentDomain();
|
||||||
if (this.query.contentdom.getCode() > 0 && (
|
if (this.query.contentdom.getCode() > 0 && (
|
||||||
(this.query.contentdom == Classification.ContentDomain.IMAGE && contentDomain != Classification.ContentDomain.IMAGE) ||
|
(this.query.contentdom == Classification.ContentDomain.IMAGE && contentDomain != Classification.ContentDomain.IMAGE) ||
|
||||||
(this.query.contentdom == Classification.ContentDomain.AUDIO && contentDomain != Classification.ContentDomain.AUDIO) ||
|
(this.query.contentdom == Classification.ContentDomain.AUDIO && contentDomain != Classification.ContentDomain.AUDIO) ||
|
||||||
|
@ -1454,7 +1451,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// filter out media links in text search, if wanted
|
// filter out media links in text search, if wanted
|
||||||
String ext = MultiProtocolURL.getFileExtension(page.url().getFileName());
|
final String ext = MultiProtocolURL.getFileExtension(page.url().getFileName());
|
||||||
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
|
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
|
||||||
if (log.isFine()) log.fine("dropped RWI: file name domain does not match");
|
if (log.isFine()) log.fine("dropped RWI: file name domain does not match");
|
||||||
continue;
|
continue;
|
||||||
|
@ -1480,7 +1477,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// check modifier constraint collection
|
// check modifier constraint collection
|
||||||
// this is not available in pure RWI entries (but in local or via solr query received metadate/entries),
|
// this is not available in pure RWI entries (but in local or via solr query received metadate/entries),
|
||||||
if (this.query.modifier.collection != null) {
|
if (this.query.modifier.collection != null) {
|
||||||
Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value
|
final Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value
|
||||||
if (docCols == null) { // no collection info
|
if (docCols == null) { // no collection info
|
||||||
decrementCounts(page.word());
|
decrementCounts(page.word());
|
||||||
continue;
|
continue;
|
||||||
|
@ -1504,16 +1501,6 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// content control
|
|
||||||
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) {
|
|
||||||
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
|
|
||||||
if (f != null && !f.isListed(page.url(), null)) {
|
|
||||||
if (log.isFine()) log.fine("dropped RWI: url is blacklisted in contentcontrol");
|
|
||||||
decrementCounts(page.word());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final String pageurl = page.url().toNormalform(true);
|
final String pageurl = page.url().toNormalform(true);
|
||||||
final String pageauthor = page.dc_creator();
|
final String pageauthor = page.dc_creator();
|
||||||
final String pagetitle = page.dc_title().toLowerCase();
|
final String pagetitle = page.dc_title().toLowerCase();
|
||||||
|
@ -1551,9 +1538,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// check geo coordinates
|
// check geo coordinates
|
||||||
double lat, lon;
|
double lat, lon;
|
||||||
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) {
|
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) {
|
||||||
double latDelta = this.query.lat - lat;
|
final double latDelta = this.query.lat - lat;
|
||||||
double lonDelta = this.query.lon - lon;
|
final double lonDelta = this.query.lon - lon;
|
||||||
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
|
final double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
|
||||||
if (distance > this.query.radius) {
|
if (distance > this.query.radius) {
|
||||||
if (log.isFine()) log.fine("dropped RWI: radius constraint");
|
if (log.isFine()) log.fine("dropped RWI: radius constraint");
|
||||||
decrementCounts(page.word());
|
decrementCounts(page.word());
|
||||||
|
@ -1564,10 +1551,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field}
|
// check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field}
|
||||||
// TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL)
|
// TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL)
|
||||||
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
|
if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
|
||||||
tagloop: for (Tagging.Metatag tag : this.query.metatags) {
|
tagloop: for (final Tagging.Metatag tag : this.query.metatags) {
|
||||||
SolrDocument sdoc = page;
|
final SolrDocument sdoc = page;
|
||||||
if (sdoc != null) {
|
if (sdoc != null) {
|
||||||
Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
|
final Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
|
||||||
if (tagvalues != null && tagvalues.contains(tag.getObject())) {
|
if (tagvalues != null && tagvalues.contains(tag.getObject())) {
|
||||||
continue tagloop; // metatag exists check next tag (filter may consist of several tags)
|
continue tagloop; // metatag exists check next tag (filter may consist of several tags)
|
||||||
}
|
}
|
||||||
|
@ -1582,8 +1569,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// TODO: it may be a little bit late here, to update navigator counters
|
// TODO: it may be a little bit late here, to update navigator counters
|
||||||
|
|
||||||
// iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts)
|
// iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts)
|
||||||
for (String s : this.navigatorPlugins.keySet()) {
|
for (final String s : this.navigatorPlugins.keySet()) {
|
||||||
Navigator navi = this.navigatorPlugins.get(s);
|
final Navigator navi = this.navigatorPlugins.get(s);
|
||||||
if (navi != null) {
|
if (navi != null) {
|
||||||
navi.incDoc(page);
|
navi.incDoc(page);
|
||||||
}
|
}
|
||||||
|
@ -1597,7 +1584,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
if(this.dateNavigator != null) {
|
if(this.dateNavigator != null) {
|
||||||
Date[] dates = page.datesInContent();
|
final Date[] dates = page.datesInContent();
|
||||||
if (dates != null) {
|
if (dates != null) {
|
||||||
for (final Date date : dates) {
|
for (final Date date : dates) {
|
||||||
if (date != null) {
|
if (date != null) {
|
||||||
|
@ -1609,8 +1596,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// handle the vocabulary navigator
|
// handle the vocabulary navigator
|
||||||
if (this.vocabularyNavigator != null) {
|
if (this.vocabularyNavigator != null) {
|
||||||
Set<String> genericFacets = new LinkedHashSet<>();
|
final Set<String> genericFacets = new LinkedHashSet<>();
|
||||||
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
||||||
genericFacets.add(v.getName());
|
genericFacets.add(v.getName());
|
||||||
}
|
}
|
||||||
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
||||||
|
@ -1674,8 +1661,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
|
|
||||||
/* Iterate over active navigator plugins to let them update the counters */
|
/* Iterate over active navigator plugins to let them update the counters */
|
||||||
for (String s : this.navigatorPlugins.keySet()) {
|
for (final String s : this.navigatorPlugins.keySet()) {
|
||||||
Navigator navi = this.navigatorPlugins.get(s);
|
final Navigator navi = this.navigatorPlugins.get(s);
|
||||||
if (navi != null) {
|
if (navi != null) {
|
||||||
if (navIncrementedWithFacets) {
|
if (navIncrementedWithFacets) {
|
||||||
fcts = facets.get(navi.getIndexFieldName());
|
fcts = facets.get(navi.getIndexFieldName());
|
||||||
|
@ -1719,7 +1706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
} else {
|
} else {
|
||||||
fcts = null;
|
fcts = null;
|
||||||
}
|
}
|
||||||
Date[] dates = entry.datesInContent();
|
final Date[] dates = entry.datesInContent();
|
||||||
if (dates != null) {
|
if (dates != null) {
|
||||||
for (final Date date : dates) {
|
for (final Date date : dates) {
|
||||||
if (date != null) {
|
if (date != null) {
|
||||||
|
@ -1752,12 +1739,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
// get the vocabulary navigation
|
// get the vocabulary navigation
|
||||||
if (this.vocabularyNavigator != null) {
|
if (this.vocabularyNavigator != null) {
|
||||||
Set<String> genericFacets = new LinkedHashSet<>();
|
final Set<String> genericFacets = new LinkedHashSet<>();
|
||||||
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
|
||||||
genericFacets.add(v.getName());
|
genericFacets.add(v.getName());
|
||||||
}
|
}
|
||||||
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
genericFacets.addAll(ProbabilisticClassifier.getContextNames());
|
||||||
for (String vocName : genericFacets) {
|
for (final String vocName : genericFacets) {
|
||||||
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName
|
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName
|
||||||
+ CollectionSchema.VOCABULARY_TERMS_SUFFIX;
|
+ CollectionSchema.VOCABULARY_TERMS_SUFFIX;
|
||||||
if (navIncrementedWithFacets) {
|
if (navIncrementedWithFacets) {
|
||||||
|
@ -1765,20 +1752,20 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
} else {
|
} else {
|
||||||
fcts = null;
|
fcts = null;
|
||||||
}
|
}
|
||||||
Object docValue = entry.getFieldValue(fieldName);
|
final Object docValue = entry.getFieldValue(fieldName);
|
||||||
if (docValue instanceof String) {
|
if (docValue instanceof String) {
|
||||||
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) docValue))) {
|
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) docValue))) {
|
||||||
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
||||||
if (vocNav != null && vocNav.get((String) docValue) > 0) {
|
if (vocNav != null && vocNav.get((String) docValue) > 0) {
|
||||||
vocNav.dec((String) docValue);
|
vocNav.dec((String) docValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(docValue instanceof Collection) {
|
} else if(docValue instanceof Collection) {
|
||||||
if (!((Collection<?>) docValue).isEmpty()) {
|
if (!((Collection<?>) docValue).isEmpty()) {
|
||||||
for (Object singleDocValue : (Collection<?>) docValue) {
|
for (final Object singleDocValue : (Collection<?>) docValue) {
|
||||||
if (singleDocValue instanceof String) {
|
if (singleDocValue instanceof String) {
|
||||||
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) singleDocValue))) {
|
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) singleDocValue))) {
|
||||||
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
|
||||||
if (vocNav != null && vocNav.get((String) singleDocValue) > 0) {
|
if (vocNav != null && vocNav.get((String) singleDocValue) > 0) {
|
||||||
vocNav.dec((String) singleDocValue);
|
vocNav.dec((String) singleDocValue);
|
||||||
}
|
}
|
||||||
|
@ -1815,10 +1802,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
if (this.ref.size() <= ic) { // size matches return map directly
|
if (this.ref.size() <= ic) { // size matches return map directly
|
||||||
result = this.getTopics(/*ic, 500*/);
|
result = this.getTopics(/*ic, 500*/);
|
||||||
} else { // collect top most count topics
|
} else { // collect top most count topics
|
||||||
result = new ConcurrentScoreMap<String>();
|
result = new ConcurrentScoreMap<>();
|
||||||
Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
|
final Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
|
||||||
while (ic-- > 0 && it.hasNext()) {
|
while (ic-- > 0 && it.hasNext()) {
|
||||||
String word = it.next();
|
final String word = it.next();
|
||||||
result.set(word, this.ref.get(word));
|
result.set(word, this.ref.get(word));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1836,8 +1823,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
*/
|
*/
|
||||||
public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
|
public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
|
||||||
// we take one entry from both stacks at the same time
|
// we take one entry from both stacks at the same time
|
||||||
boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
|
final boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
|
||||||
boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
|
final boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
|
||||||
return solrSuccess || rwiSuccess;
|
return solrSuccess || rwiSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1857,7 +1844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
success = true;
|
success = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") {
|
final Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
SearchEvent.this.oneFeederStarted();
|
SearchEvent.this.oneFeederStarted();
|
||||||
|
@ -1894,7 +1881,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
|
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
|
||||||
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
|
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
|
||||||
if (node != null) {
|
if (node != null) {
|
||||||
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
|
final LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
|
||||||
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
|
if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
|
||||||
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
|
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
|
||||||
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
|
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
|
||||||
|
@ -1908,7 +1895,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
false);
|
false);
|
||||||
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
|
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||||
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
|
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
|
||||||
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
|
final URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
|
||||||
addResult(re, localEntryElement.getWeight());
|
addResult(re, localEntryElement.getWeight());
|
||||||
success = true;
|
success = true;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1955,8 +1942,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
|
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
|
||||||
|
|
||||||
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
|
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
|
||||||
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow
|
this.resultList.put(new ReverseElement<>(resultEntry, ranking)); // remove smallest in case of overflow
|
||||||
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
|
if (this.pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
|
||||||
this.addTopics(resultEntry);
|
this.addTopics(resultEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1984,7 +1971,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// apply citation count
|
// apply citation count
|
||||||
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
|
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
|
||||||
if (this.query.getSegment().connectedCitation()) {
|
if (this.query.getSegment().connectedCitation()) {
|
||||||
int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
|
final int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
|
||||||
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
|
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
|
||||||
}
|
}
|
||||||
// prefer hit with 'prefer' pattern
|
// prefer hit with 'prefer' pattern
|
||||||
|
@ -2002,11 +1989,11 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
|
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
|
||||||
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
|
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
|
||||||
for (final String urlcomp : urlcompmap) {
|
for (final String urlcomp : urlcompmap) {
|
||||||
int tc = topwords.get(urlcomp);
|
final int tc = topwords.get(urlcomp);
|
||||||
if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist;
|
if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist;
|
||||||
}
|
}
|
||||||
for (final String descrcomp : descrcompmap) {
|
for (final String descrcomp : descrcompmap) {
|
||||||
int tc = topwords.get(descrcomp);
|
final int tc = topwords.get(descrcomp);
|
||||||
if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist;
|
if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2037,10 +2024,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
// load snippet
|
// load snippet
|
||||||
ContentDomain contentDomain = page.getContentDomain();
|
final ContentDomain contentDomain = page.getContentDomain();
|
||||||
if (contentDomain == Classification.ContentDomain.TEXT || contentDomain == Classification.ContentDomain.ALL) {
|
if (contentDomain == Classification.ContentDomain.TEXT || contentDomain == Classification.ContentDomain.ALL) {
|
||||||
// attach text snippet
|
// attach text snippet
|
||||||
long startTime = System.currentTimeMillis();
|
final long startTime = System.currentTimeMillis();
|
||||||
final TextSnippet snippet = new TextSnippet(
|
final TextSnippet snippet = new TextSnippet(
|
||||||
this.loader,
|
this.loader,
|
||||||
page,
|
page,
|
||||||
|
@ -2110,7 +2097,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
* For local only search, a new SearchEvent should be created, starting directly at the requested offset,
|
* For local only search, a new SearchEvent should be created, starting directly at the requested offset,
|
||||||
* thus allowing to handle last pages of large resultsets
|
* thus allowing to handle last pages of large resultsets
|
||||||
*/
|
*/
|
||||||
int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
|
final int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
|
||||||
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}}
|
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}}
|
||||||
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
|
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
|
||||||
// Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added
|
// Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added
|
||||||
|
@ -2175,43 +2162,43 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
|
|
||||||
/** Image results counter */
|
/** Image results counter */
|
||||||
private int imagePageCounter = 0;
|
private int imagePageCounter = 0;
|
||||||
private LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<String, ImageResult>();
|
private final LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<>();
|
||||||
private LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<String, ImageResult>();
|
private final LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<>();
|
||||||
private LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<String, ImageResult>();
|
private final LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<>();
|
||||||
private ImageResult nthImage(int item) {
|
private ImageResult nthImage(int item) {
|
||||||
Object o = SetTools.nth(this.imageViewed.values(), item);
|
final Object o = SetTools.nth(this.imageViewed.values(), item);
|
||||||
if (o == null) return null;
|
if (o == null) return null;
|
||||||
return (ImageResult) o;
|
return (ImageResult) o;
|
||||||
}
|
}
|
||||||
private boolean hasSpare() {
|
private boolean hasSpare() {
|
||||||
return imageSpareGood.size() > 0 || imageSpareBad.size() > 0;
|
return this.imageSpareGood.size() > 0 || this.imageSpareBad.size() > 0;
|
||||||
}
|
}
|
||||||
private boolean containsSpare(String id) {
|
private boolean containsSpare(String id) {
|
||||||
return imageSpareGood.containsKey(id) || imageSpareBad.containsKey(id);
|
return this.imageSpareGood.containsKey(id) || this.imageSpareBad.containsKey(id);
|
||||||
}
|
}
|
||||||
private int sizeSpare() {
|
private int sizeSpare() {
|
||||||
return imageSpareGood.size() + imageSpareBad.size();
|
return this.imageSpareGood.size() + this.imageSpareBad.size();
|
||||||
}
|
}
|
||||||
private ImageResult nextSpare() {
|
private ImageResult nextSpare() {
|
||||||
if (imageSpareGood.size() > 0) {
|
if (this.imageSpareGood.size() > 0) {
|
||||||
Map.Entry<String, ImageResult> next = imageSpareGood.entrySet().iterator().next();
|
final Map.Entry<String, ImageResult> next = this.imageSpareGood.entrySet().iterator().next();
|
||||||
imageViewed.put(next.getKey(), next.getValue());
|
this.imageViewed.put(next.getKey(), next.getValue());
|
||||||
imageSpareGood.remove(next.getKey());
|
this.imageSpareGood.remove(next.getKey());
|
||||||
return next.getValue();
|
return next.getValue();
|
||||||
}
|
}
|
||||||
if (imageSpareBad.size() > 0) {
|
if (this.imageSpareBad.size() > 0) {
|
||||||
Map.Entry<String, ImageResult> next = imageSpareBad.entrySet().iterator().next();
|
final Map.Entry<String, ImageResult> next = this.imageSpareBad.entrySet().iterator().next();
|
||||||
imageViewed.put(next.getKey(), next.getValue());
|
this.imageViewed.put(next.getKey(), next.getValue());
|
||||||
imageSpareBad.remove(next.getKey());
|
this.imageSpareBad.remove(next.getKey());
|
||||||
return next.getValue();
|
return next.getValue();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException {
|
public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException {
|
||||||
if (item < imageViewed.size()) return nthImage(item);
|
if (item < this.imageViewed.size()) return nthImage(item);
|
||||||
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
|
if (this.imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
|
||||||
URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
|
final URIMetadataNode doc = oneResult(this.imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
|
||||||
// check if the match was made in the url or in the image links
|
// check if the match was made in the url or in the image links
|
||||||
if (doc == null) {
|
if (doc == null) {
|
||||||
if (hasSpare()) return nextSpare();
|
if (hasSpare()) return nextSpare();
|
||||||
|
@ -2231,45 +2218,45 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
// check image size
|
// check image size
|
||||||
final Collection<Object> height = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
|
final Collection<Object> height = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
|
||||||
final Collection<Object> width = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
|
final Collection<Object> width = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
|
||||||
int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown
|
final int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown
|
||||||
int w = width == null ? 0 : (Integer) width.iterator().next();
|
final int w = width == null ? 0 : (Integer) width.iterator().next();
|
||||||
if ((h <= 0 || h > 16) && (w <= 0 || w > 16)) { // we don't want too small images (< 16x16)
|
if ((h <= 0 || h > 16) && (w <= 0 || w > 16)) { // we don't want too small images (< 16x16)
|
||||||
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
|
if (!this.imageViewed.containsKey(id) && !containsSpare(id)) this.imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(!strictContentDom) {
|
} else if(!strictContentDom) {
|
||||||
Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
|
final Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
|
||||||
Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
|
final Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
|
||||||
if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) {
|
if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) {
|
||||||
List<Object> alt = altO == null ? null : (List<Object>) altO;
|
final List<Object> alt = altO == null ? null : (List<Object>) altO;
|
||||||
List<Object> img = (List<Object>) imgO;
|
final List<Object> img = (List<Object>) imgO;
|
||||||
List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size());
|
final List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size());
|
||||||
Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
|
final Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
|
||||||
Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
|
final Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
|
||||||
List<Object> height = heightO == null ? null : (List<Object>) heightO;
|
final List<Object> height = heightO == null ? null : (List<Object>) heightO;
|
||||||
List<Object> width = widthO == null ? null : (List<Object>) widthO;
|
final List<Object> width = widthO == null ? null : (List<Object>) widthO;
|
||||||
for (int c = 0; c < img.size(); c++) {
|
for (int c = 0; c < img.size(); c++) {
|
||||||
String image_urlstub = (String) img.get(c);
|
final String image_urlstub = (String) img.get(c);
|
||||||
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
|
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
|
||||||
* or documents coming from previous versions peers */
|
* or documents coming from previous versions peers */
|
||||||
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
|
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
|
||||||
try {
|
try {
|
||||||
int h = height == null ? 0 : (Integer) height.get(c);
|
final int h = height == null ? 0 : (Integer) height.get(c);
|
||||||
int w = width == null ? 0 : (Integer) width.get(c);
|
final int w = width == null ? 0 : (Integer) width.get(c);
|
||||||
|
|
||||||
// check size good for display (parser may init unknown dimension with -1)
|
// check size good for display (parser may init unknown dimension with -1)
|
||||||
if (h > 0 && h <= 16) continue; // to small for display
|
if (h > 0 && h <= 16) continue; // to small for display
|
||||||
if (w > 0 && w <= 16) continue; // to small for display
|
if (w > 0 && w <= 16) continue; // to small for display
|
||||||
|
|
||||||
DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub);
|
final DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub);
|
||||||
String id = ASCII.String(imageUrl.hash());
|
final String id = ASCII.String(imageUrl.hash());
|
||||||
if (!imageViewed.containsKey(id) && !containsSpare(id)) {
|
if (!this.imageViewed.containsKey(id) && !containsSpare(id)) {
|
||||||
String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : "";
|
final String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : "";
|
||||||
ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0);
|
final ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0);
|
||||||
boolean match = (query.getQueryGoal().matches(image_urlstub) || query.getQueryGoal().matches(image_alt));
|
final boolean match = (this.query.getQueryGoal().matches(image_urlstub) || this.query.getQueryGoal().matches(image_alt));
|
||||||
if (match) imageSpareGood.put(id, imageResult); else imageSpareBad.put(id, imageResult);
|
if (match) this.imageSpareGood.put(id, imageResult); else this.imageSpareBad.put(id, imageResult);
|
||||||
}
|
}
|
||||||
} catch (MalformedURLException e) {
|
} catch (final MalformedURLException e) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2303,7 +2290,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) {
|
while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) {
|
||||||
URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
|
final URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
|
||||||
if (re == null) break;
|
if (re == null) break;
|
||||||
}
|
}
|
||||||
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
|
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
|
||||||
|
@ -2331,7 +2318,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
|
||||||
* because they were not supposed to be here. If really necessary to keep them,
|
* because they were not supposed to be here. If really necessary to keep them,
|
||||||
* growing the maxSize of the resultList should be considered here.
|
* growing the maxSize of the resultList should be considered here.
|
||||||
*/
|
*/
|
||||||
WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
|
final WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drain stacks in two steps (Solr, then RWI), because one stack might still
|
* Drain stacks in two steps (Solr, then RWI), because one stack might still
|
||||||
|
|
Loading…
Reference in New Issue
Block a user