removed ContentControl servlet and functinality

This was not used at all (as I know) and was blocking a smooth
integration of ivy in the context of an existing JSON parser.
This commit is contained in:
Michael Peter Christen 2022-09-28 17:25:04 +02:00
parent b54f4ad35f
commit fc98ca7a9c
24 changed files with 1915 additions and 3010 deletions

View File

@ -3,119 +3,6 @@
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/> <classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/>
<classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/> <classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/> <classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/avatica-core-1.13.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-core-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-linq4j-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/jchardet-1.0.jar"/>
<classpathentry kind="lib" path="lib/common-image-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
<classpathentry kind="lib" path="lib/commons-lang3-3.12.0.jar"/>
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/httpclient-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/httpcore-4.4.13.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/icu4j-63.1.jar"/>
<classpathentry kind="lib" path="lib/imageio-bmp-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-core-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-metadata-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-tiff-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-annotations-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-core-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-databind-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/jaudiotagger-2.2.5.jar"/>
<classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/>
<classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/>
<classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.25.jar"/>
<classpathentry kind="lib" path="lib/jetty-client-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-continuation-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-deploy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-http-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-io-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-jmx-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-proxy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-security-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-server-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlet-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlets-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-webapp-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-xml-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jsch-0.1.54.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jsonic-1.3.10.jar"/>
<classpathentry kind="lib" path="lib/jwat-archive-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-gzip-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-warc-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-backward-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-classification-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-grouping-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-join-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-misc-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queries-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-spatial-extras-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-suggest-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/opentracing-api-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-noop-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-util-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/metadata-extractor-2.11.0.jar"/>
<classpathentry kind="lib" path="lib/metrics-core-3.2.2.jar"/>
<classpathentry kind="lib" path="lib/metrics-jmx-4.1.5.jar"/>
<classpathentry kind="lib" path="lib/org.restlet.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/poi-3.17.jar"/>
<classpathentry kind="lib" path="lib/poi-scratchpad-3.17.jar"/>
<classpathentry kind="lib" path="lib/rrd4j-3.2.jar"/>
<classpathentry kind="lib" path="lib/solr-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/solr-solrj-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/spatial4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/stax2-api-3.1.4.jar"/>
<classpathentry kind="lib" path="lib/weupnp-0.1.4.jar"/>
<classpathentry kind="lib" path="lib/woodstox-core-asl-4.4.1.jar"/>
<classpathentry kind="lib" path="lib/xml-apis-1.4.01.jar"/>
<classpathentry kind="lib" path="lib/xmpcore-5.1.3.jar"/>
<classpathentry kind="lib" path="lib/xz-1.8.jar"/>
<classpathentry kind="lib" path="lib/zookeeper-3.4.14.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
<classpathentry kind="lib" path="lib/hazelcast-4.2.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.21.jar"/>
<classpathentry kind="lib" path="lib/bcmail-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcpkix-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcprov-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.14.2.jar"/>
<classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-api-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/langdetect-1.1-20120112.jar"/>
<classpathentry kind="src" path="htroot/api/blacklists"/> <classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/> <classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/> <classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
@ -126,6 +13,16 @@
<classpathentry kind="src" path="htroot/yacy"/> <classpathentry kind="src" path="htroot/yacy"/>
<classpathentry kind="src" path="source"/> <classpathentry kind="src" path="source"/>
<classpathentry kind="src" path="test/java"/> <classpathentry kind="src" path="test/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=yacy&amp;ivyXmlPath=ivy.xml&amp;confs=compile">
<attributes>
<attribute name="module" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/> <classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/>
<classpathentry kind="output" path="gen"/> <classpathentry kind="output" path="gen"/>
</classpath> </classpath>

View File

@ -23,5 +23,6 @@
</buildSpec> </buildSpec>
<natures> <natures>
<nature>org.eclipse.jdt.core.javanature</nature> <nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.apache.ivyde.eclipse.ivynature</nature>
</natures> </natures>
</projectDescription> </projectDescription>

View File

@ -1370,16 +1370,6 @@ core.service.webgraph.tmp = false
parserAugmentation = false parserAugmentation = false
parserAugmentation.RDFa = false parserAugmentation.RDFa = false
# Content control settings
contentcontrol.enabled = false
contentcontrol.bookmarklist = contentcontrol
contentcontrol.mandatoryfilterlist = yacy
contentcontrol.smwimport.enabled = false
contentcontrol.smwimport.baseurl =
contentcontrol.smwimport.purgelistoninit = true
contentcontrol.smwimport.targetlist = contentcontrol
contentcontrol.smwimport.defaultcategory = yacy
# host browser settings # host browser settings
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link # Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
browser.autoload = false browser.autoload = false

View File

@ -1,95 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Content Control</title>
#%env/templates/metas.template%#
</head>
<body id="Settings">
#%env/templates/header.template%#
#%env/templates/submenuBlacklist.template%#
<h2>Content Control</h2>
<form id="contentcontrolsettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="augmentation">Peer Content Control URL Filter</legend>
<p>
With this settings you can activate or deactivate content control on this peer.
</p>
<dl>
<dt><label for="content">Use content control filtering:</label></dt>
<dd>
<input type="checkbox" name="contentcontrolenabled" id="contentcontrolenabled" #(contentcontrolenabled_checked)#:: checked="checked"#(/contentcontrolenabled_checked)# />Enabled<br/>
<p class="help">
Enables or disables content control.
</p>
</dd>
<dt><label for="content">Use this table to create filter:</label></dt>
<dd>
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
<p class="help">
Define a table. Default: contentcontrol
</p>
</dd>
<dt></dt>
<dd><input type="submit" name="contentcontrolSettings" value="Submit" class="btn btn-primary"/></dd>
</dl>
</fieldset>
</form>
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="urlproxy">Content Control SMW Import Settings</legend>
<p>
With this settings you can define the content control import settings. You can define a <a href="http://wiki.sciety.org/mediawiki/extensions/yacy-smwextension/" target="_blank">Semantic Media Wiki with the appropriate extensions.</a>
</p>
<dl>
<dt><label for="content">SMW import to content control list:</label></dt>
<dd>
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
<p class="help">
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!
</p>
</dd>
<dt><label for="content">SMW import base URL:</label></dt>
<dd>
<input type="text" name="ccsmwimporturl" value="#[ccsmwimporturl]#" size="60" /><br/><br/>
<p class="help">
Define base URL for SMW special page "Ask". Example: http://my.wiki.cc/wiki/Special:Ask
</p>
</dd>
<dt><label for="content">SMW import target table:</label></dt>
<dd>
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
<p class="help">
Define import target table. Default: contentcontrol
</p>
</dd>
<dt><label for="content">Purge content control list on initial sync:</label></dt>
<dd>
<input type="checkbox" name="ccsmwpurge" id="ccsmwpurge" #(ccsmwpurge_checked)#:: checked="checked"#(/ccsmwpurge_checked)# />Enabled<br/>
<p class="help">
Purge content control list on initial synchronisation after startup.
</p>
</dd>
<dt></dt>
<dd><input type="submit" name="contentcontrolExtraSettings" value="Submit" class="btn btn-primary"/></dd>
</dl>
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

View File

@ -1,68 +0,0 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public final class ContentControl_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
if (post != null) {
if (post.containsKey("contentcontrolExtraSettings")) {
env.setConfig("contentcontrol.smwimport.baseurl",
post.get("ccsmwimporturl"));
env.setConfig("contentcontrol.smwimport.enabled",
"on".equals(post.get("ccsmwimport")) ? true : false);
env.setConfig("contentcontrol.smwimport.purgelistoninit",
"on".equals(post.get("ccsmwpurge")) ? true : false);
env.setConfig("contentcontrol.smwimport.targetlist",
post.get("ccsmwimportlist"));
}
if (post.containsKey("contentcontrolSettings")) {
env.setConfig("contentcontrol.enabled",
"on".equals(post.get("contentcontrolenabled")) ? true : false);
env.setConfig("contentcontrol.bookmarklist",
post.get("contentcontrolbml"));
}
}
prop.putHTML("ccsmwimportlist",
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
prop.put("ccsmwpurge_checked", env.getConfigBool(
"contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0");
prop.putHTML("ccsmwimporturl",
env.getConfig("contentcontrol.smwimport.baseurl", ""));
prop.put("ccsmwimport_checked", env.getConfigBool(
"contentcontrol.smwimport.enabled", false) ? "1" : "0");
prop.put("contentcontrolenabled_checked",
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
prop.putHTML("contentcontrolbml",
env.getConfig("contentcontrol.bookmarklist", ""));
// return rewrite properties
return prop;
}
}

View File

@ -5,6 +5,5 @@
<li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li> <li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li>
<li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li> <li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li>
<li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li> <li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li>
<li><a href="ContentControl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Content Control</a></li>
</ul> </ul>
</div> </div>

View File

@ -13,7 +13,6 @@
<dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/> <dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" /> <dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" />
<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/> <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/>
<dependency org="com.googlecode.json-simple" name="json-simple" rev="1.1.1" conf="compile->master" />
<dependency org="com.google.guava" name="guava" rev="25.1-jre" conf="compile->master"/> <dependency org="com.google.guava" name="guava" rev="25.1-jre" conf="compile->master"/>
<dependency org="com.hazelcast" name="hazelcast" rev="4.2" /> <dependency org="com.hazelcast" name="hazelcast" rev="4.2" />
<dependency org="com.ibm.icu" name="icu4j" rev="63.1"/> <dependency org="com.ibm.icu" name="icu4j" rev="63.1"/>
@ -94,7 +93,6 @@
<!-- This does not match langdetect.jar from pre-ivy --> <!-- This does not match langdetect.jar from pre-ivy -->
<dependency org="org.tukaani" name="xz" rev="1.8"/> <dependency org="org.tukaani" name="xz" rev="1.8"/>
<dependency org="oro" name="oro" rev="2.0.8"/> <dependency org="oro" name="oro" rev="2.0.8"/>
<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/>
<dependency org="junit" name="junit" rev="4.13" conf="test->default"/> <dependency org="junit" name="junit" rev="4.13" conf="test->default"/>
<dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/> <dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/>

View File

@ -953,30 +953,6 @@ Duration==Dauer
#ID==ID #ID==ID
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Inhaltskontrolle<
Peer Content Control URL Filter==Peer Inhaltskontrolle URL Filter
With this settings you can activate or deactivate content control on this peer.==Mit dieser Einstellung kann die Inhaltskontrolle auf diesem Peer an- oder abgeschalten werden.
Use content control filtering:==Verwende Inhaltskontrollfilter:
>Enabled<==>Aktiviert<
Enables or disables content control.==Schaltet Inhaltskontrolle an- oder ab.
Use this table to create filter:==Verwenden Sie diese Tabelle, um Filter zu erzeugen:
Define a table. Default:==Definieren Sie ein Tabelle. Standardeinstellung:
Content Control SMW Import Settings==Inhaltskontrolle SMW Importeinstellungen
With this settings you can define the content control import settings. You can define a==Mit diesen Einstellungen können Sie die Importeinstellungen für die Inhaltskontrolle definieren. Definieren Sie ein
Semantic Media Wiki with the appropriate extensions.==Semantisches Media Wiki mit den passenden Erweiterungen.
SMW import to content control list:==SMW Import für die Inhalts-Kontroll-Liste:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Konstante Synchronisation der Inhalts-Kontroll-Liste vom SMW (Semantisches Medienwiki) im Hintergrund. Benötigt Neustart!
SMW import base URL:==SMW Import Basis URL:
Define base URL for SMW special page "Ask". Example: ==Definiere Basis URL für SMW Spezialseite "Ask". Beispiel:
SMW import target table:==SMW Import Ziele Tabelle:
Define import target table. Default: contentcontrol==Definieren Import Ziel Tabelle. Standardeinstellung: contentcontrol
Purge content control list on initial sync:==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation:
Purge content control list on initial synchronisation after startup.==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation nach dem Start.
"Submit"=="Absenden"
#-----------------------------
#File: CookieMonitorIncoming_p.html #File: CookieMonitorIncoming_p.html
#--------------------------- #---------------------------

View File

@ -530,13 +530,6 @@ Duration==Duración
ID==ID ID==ID
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Control de contenido<
>Enabled<==>Habilitado
"Submit"=="Enviar"
#-----------------------------
#File: CookieMonitorIncoming_p.html #File: CookieMonitorIncoming_p.html
#--------------------------- #---------------------------

View File

@ -510,13 +510,6 @@ Duration==Durata
ID==ID ID==ID
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Controllo dei contenuti<
>Enabled<==>Abilitato
"Submit"=="Invia"
#-----------------------------
#File: CookieMonitorIncoming_p.html #File: CookieMonitorIncoming_p.html
#--------------------------- #---------------------------

View File

@ -714,13 +714,6 @@ Last Deploy==最後の展開
Connection Tracking==接続の追跡 Connection Tracking==接続の追跡
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==コンテントの制御<
"Submit"=="確定する"
#-----------------------------
#File: CookieMonitorIncoming_p.html #File: CookieMonitorIncoming_p.html
#--------------------------- #---------------------------
Incoming Cookies Monitor==着信したCookieのモニター Incoming Cookies Monitor==着信したCookieのモニター

View File

@ -2429,71 +2429,6 @@
</body> </body>
</file> </file>
<file original="ContentControl_p.html" source-language="en" datatype="html">
<body>
<trans-unit id="3f3b9286" xml:space="preserve" approved="no" translate="yes">
<source>Content Control&lt;</source>
</trans-unit>
<trans-unit id="d21676d1" xml:space="preserve" approved="no" translate="yes">
<source>Peer Content Control URL Filter</source>
</trans-unit>
<trans-unit id="542e1ecb" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can activate or deactivate content control on this peer.</source>
</trans-unit>
<trans-unit id="2bd01413" xml:space="preserve" approved="no" translate="yes">
<source>Use content control filtering:</source>
</trans-unit>
<trans-unit id="4e4f2379" xml:space="preserve" approved="no" translate="yes">
<source>&gt;Enabled&lt;</source>
</trans-unit>
<trans-unit id="ff54fe20" xml:space="preserve" approved="no" translate="yes">
<source>Enables or disables content control.</source>
</trans-unit>
<trans-unit id="81cdc1a8" xml:space="preserve" approved="no" translate="yes">
<source>Use this table to create filter:</source>
</trans-unit>
<trans-unit id="2a641f75" xml:space="preserve" approved="no" translate="yes">
<source>Define a table. Default:</source>
</trans-unit>
<trans-unit id="c3a262b1" xml:space="preserve" approved="no" translate="yes">
<source>Content Control SMW Import Settings</source>
</trans-unit>
<trans-unit id="fe0fc485" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can define the content control import settings. You can define a</source>
</trans-unit>
<trans-unit id="a00319d4" xml:space="preserve" approved="no" translate="yes">
<source>Semantic Media Wiki with the appropriate extensions.</source>
</trans-unit>
<trans-unit id="3f00f0c5" xml:space="preserve" approved="no" translate="yes">
<source>SMW import to content control list:</source>
</trans-unit>
<trans-unit id="446815ef" xml:space="preserve" approved="no" translate="yes">
<source>Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!</source>
</trans-unit>
<trans-unit id="d9bff282" xml:space="preserve" approved="no" translate="yes">
<source>SMW import base URL:</source>
</trans-unit>
<trans-unit id="ecfbe3e8" xml:space="preserve" approved="no" translate="yes">
<source>Define base URL for SMW special page "Ask". Example: </source>
</trans-unit>
<trans-unit id="d0d7e963" xml:space="preserve" approved="no" translate="yes">
<source>SMW import target table:</source>
</trans-unit>
<trans-unit id="84acd3e4" xml:space="preserve" approved="no" translate="yes">
<source>Define import target table. Default: contentcontrol</source>
</trans-unit>
<trans-unit id="70ed825" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial sync:</source>
</trans-unit>
<trans-unit id="642de9e8" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial synchronisation after startup.</source>
</trans-unit>
<trans-unit id="bfcc5088" xml:space="preserve" approved="no" translate="yes">
<source>"Submit"</source>
</trans-unit>
</body>
</file>
<file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html"> <file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html">
<body> <body>
<trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes"> <trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes">

View File

@ -1059,30 +1059,6 @@ Duration==Длительность
#ID==ID #ID==ID
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==Управление контентом<
Peer Content Control URL Filter==Управление контентом узла
With this settings you can activate or deactivate content control on this peer.==Эти настройки позволяют включить или отключить управление контентом для вашего узла.
Use content control filtering:==Использовать фильтр управления контентом:
>Enabled<==>Включить<
Enables or disables content control.==Включение или отключение управления контентом.
Use this table to create filter:==Использовать это поле для создания фильтра:
Define a table. Default:==Задать значение поля. По-умолчанию:
Content Control SMW Import Settings==Импорт настроек управления контентом SMW
With this settings you can define the content control import settings. You can define a==Эти настройки позволяют задать параметры импорта настроек управления контентом
Semantic Media Wiki with the appropriate extensions.==Semantic Media Wiki с соответствующими расширениями.
SMW import to content control list:== Импорт SMW в список управления контентом:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Включение или отключение постоянной фоновой синхронизации списка управления контентом из SMW (Semantic Mediawiki). Потребуется перезапуск программы!
SMW import base URL:==Ссылка на импортируемую базу SMW:
Define base URL for SMW special page "Ask". Example: ==Укажите ссылку на базу SMW на специальной странице "Ask". Например:
SMW import target table:==Поле назначения импорта SMW:
Define import target table. Default: contentcontrol==Укажите поле назначения импорта. По-умолчанию: contentcontrol
Purge content control list on initial sync:==Удалить список управления контентом в начале синхронизации:
Purge content control list on initial synchronisation after startup.==Удалить список управления контентом в начале синхронизации после запуска программы.
"Submit"=="Сохранить"
#-----------------------------
#File: CookieMonitorIncoming_p.html #File: CookieMonitorIncoming_p.html
#--------------------------- #---------------------------

View File

@ -1033,31 +1033,6 @@ For minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLe
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少 The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少
#----------------------------- #-----------------------------
#File: ContentControl_p.html
#---------------------------
Content Control<==内容控制<
Peer Content Control URL Filter==节点内容控制地址过滤器
With this settings you can activate or deactivate content control on this peer==使用此设置你可以激活或取消激活此YaCy节点上的内容控制
Use content control filtering:==使用内容控制过滤:
>Enabled<==>已启用<
Enables or disables content control==启用或禁用内容控制
Use this table to create filter:==使用此表创建过滤器:
Define a table. Default:==定义一个表格. 默认:
Content Control SMW Import Settings==内容控制SMW导入设置
With this settings you can define the content control import settings. You can define a==使用此设置,你可以定义内容控制导入设置. 你可以定义一个
Semantic Media Wiki with the appropriate extensions==语义媒体百科与适当的扩展
SMW import to content control list:==SMW导入到内容控制列表:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==启用或禁用来自SMWSemantic Mediawiki的内容控制列表的恒定后台同步。 需要重启!
SMW import base URL:==SMW导入基URL:
Define base URL for SMW special page "Ask". Example: ==为SMW特殊页面“Ask”定义基础地址.例:
SMW import target table:==SMW导入目标表:
Define import target table. Default: contentcontrol==定义导入目标表. 默认值:contentcontrol
Purge content control list on initial sync:==在初始同步时清除内容控制列表:
Purge content control list on initial synchronisation after startup.==重启后,清除初始同步的内容控制列表.
"Submit"=="提交"
Define base URL for SMW special page "Ask". Example:==为SMW特殊页面“Ask”定义基础地址.例:
#-----------------------------
#File: ContentIntegrationPHPBB3_p.html #File: ContentIntegrationPHPBB3_p.html
#--------------------------- #---------------------------
Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入 Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入

View File

@ -1,90 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard;
public class ContentControlFilterUpdateThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private static FilterEngine networkfilter;
public ContentControlFilterUpdateThread(final Switchboard sb) {
this.sb = sb;
}
@Override
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) {
if (SMWListSyncThread.dirty) {
networkfilter = updateFilter();
SMWListSyncThread.dirty = false;
}
}
this.locked = false;
}
return;
}
private static FilterEngine updateFilter () {
FilterEngine newfilter = new FilterEngine();
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
try {
it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist",
"contentcontrol"));
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return newfilter;
}
public static FilterEngine getNetworkFilter() {
FilterEngine f = networkfilter;
if (f != null && f.size() > 0)
return f;
return null;
}
}

View File

@ -1,163 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private SMWListRow row;
private final JSONParser parser;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
private final HashMap<String,String> obj;
private Boolean isElement;
public SMWListImporter(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.row = new SMWListRow();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
this.obj = new HashMap<String,String>();
this.isElement = false;
}
@Override
public void startJSON() throws ParseException, IOException {
}
@Override
public void endJSON() throws ParseException, IOException {
}
@Override
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if (key.equals("items")) {
this.isElement = true;
}
return true;
}
@Override
public boolean endArray() throws ParseException, IOException {
return true;
}
@Override
public boolean startObject() throws ParseException, IOException {
return true;
}
@Override
public boolean endObject() throws ParseException, IOException {
if(this.isElement) {
for (Entry<String, String> e: this.obj.entrySet()) {
this.row.add (e.getKey(), e.getValue());
}
try {
this.listEntries.put(this.row);
//this.count++;
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
this.obj.clear();
this.row = new SMWListRow();
}
return true;
}
@Override
public boolean startObjectEntry(String key) throws ParseException, IOException {
this.key.setLength(0);
this.key.append(key);
return true;
}
@Override
public boolean primitive(Object value) throws ParseException, IOException {
this.value.setLength(0);
if(value instanceof java.lang.String) {
this.value.append((String)value);
} else if(value instanceof java.lang.Boolean) {
this.value.append(value);
} else if(value instanceof java.lang.Number) {
this.value.append(value);
}
return true;
}
@Override
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
}
@Override
public void run() {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
this.parser.parse(this.importFile, this, true);
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final ParseException e) {
ConcurrentLog.logException(e);
} finally {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
}
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
return null;
}
}
}

View File

@ -1,117 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporterFormatObsolete implements Runnable{
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private final JSONParser parser;
public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.parser = new JSONParser();
}
@Override
public void run() {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer run()");
Object obj = this.parser.parse(this.importFile);
JSONObject jsonObject = (JSONObject) obj;
JSONArray items = (JSONArray) jsonObject.get("items");
@SuppressWarnings("unchecked")
Iterator<JSONObject> iterator = items.iterator();
while (iterator.hasNext()) {
this.parseItem (iterator.next());
}
} catch (final IOException e) {
ConcurrentLog.logException(e);
} catch (final ParseException e) {
ConcurrentLog.logException(e);
} finally {
try {
ConcurrentLog.info("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
}
}
}
private void parseItem(JSONObject jsonObject) {
try {
SMWListRow row = new SMWListRow();
@SuppressWarnings("unchecked")
Iterator<String> iterator = jsonObject.keySet().iterator();
while (iterator.hasNext()) {
String entryKey = iterator.next();
Object value = jsonObject.get (entryKey);
String valueKey = "";
if (value instanceof java.lang.String) {
valueKey = value.toString();
} else if (value instanceof JSONArray) {
valueKey = jsonListAll ((JSONArray) value);
}
row.add (entryKey, valueKey);
}
this.listEntries.put(row);
} catch (final Exception e) {
ConcurrentLog.info("SMWLISTSYNC", "import of entry failed");
}
}
private String jsonListAll(JSONArray value) {
String res = "";
@SuppressWarnings("unchecked")
Iterator<Object> iterator = value.listIterator();
while (iterator.hasNext()) {
Object val = iterator.next();
res += val.toString()+",";
}
if (res.endsWith (",")) {
res = res.substring (0, res.length()-1);
}
return res;
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
ConcurrentLog.logException(e);
return null;
}
}
}

View File

@ -1,24 +0,0 @@
package net.yacy.contentcontrol;
import net.yacy.kelondro.blob.Tables;
public class SMWListRow {
private Tables.Data data;
public static final SMWListRow POISON = new SMWListRow();
public static final SMWListRow EMPTY = new SMWListRow();
public SMWListRow() {
this.data = new Tables.Data();
}
public void add (String key, String value) {
this.data.put(key, value);
}
public Tables.Data getData() {
return this.data;
}
}

View File

@ -1,201 +0,0 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.Switchboard;
public class SMWListSyncThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
private String targetList;
private String parameters;
private String query;
public static Boolean dirty = false;
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) {
this.sb = sb;
this.targetList = targetList;
this.parameters = parameters;
this.query = query;
if (purgeOnInit) {
this.sb.tables.clear(targetList);
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
@Override
public final void run() {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (!this.runningjob) {
// we have to count all new elements first
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
URL urlCount;
urlCount = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/offset%3D0"
+ "/limit%3D200000"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false));
String overallcount = CommonPattern.COMMA.split(reply)[0];
String lastsyncstring = CommonPattern.COMMA.split(reply)[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
ConcurrentLog.info("SMWLISTSYNC",
"import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
ConcurrentLog.warn("SMWLISTSYNC",
"No SMWimport URL defined");
}
} catch (final MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// there are new elements to be imported
ConcurrentLog.info("SMWLISTSYNC",
"importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL urlImport;
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
urlImport = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/syntax%3Dobsolete"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
urlImport.openStream(), StandardCharsets.UTF_8);
} catch (final Exception e) {
ConcurrentLog.logException(e);
this.runningjob = false;
}
if (reader != null) {
SMWListImporterFormatObsolete smwListImporter = null;
try {
smwListImporter = new SMWListImporterFormatObsolete(
reader, 200);
} catch (final Exception e) {
// TODO: display an error message
ConcurrentLog.logException(e);
this.runningjob = false;
}
Thread t;
SMWListRow row;
t = new Thread(smwListImporter,"SMW List Importer");
t.start();
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
if (row == SMWListRow.EMPTY) {
this.runningjob = false;
} else {
try {
this.sb.tables.insert(targetList, row.getData());
dirty = true;
} catch (final Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
}
} catch (final MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
}
this.locked = false;
}
}
return;
}
}

View File

@ -26,31 +26,31 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import net.yacy.cora.federate.solr.instance.ServerShard; import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.CommonParams;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector { public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName()); protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName());
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<Byte>(0, true); public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<>(0, true);
// pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks // pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks
// this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks // this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks
static { static {
@ -158,8 +158,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override @Override
public void deleteByIds(final Collection<String> ids) throws IOException { public void deleteByIds(final Collection<String> ids) throws IOException {
if (this.server == null) return; if (this.server == null) return;
List<String> l = new ArrayList<String>(); final List<String> l = new ArrayList<>();
for (String s: ids) l.add(s); for (final String s: ids) l.add(s);
synchronized (this.server) { synchronized (this.server) {
try { try {
this.server.deleteById(l, -1); this.server.deleteById(l, -1);
@ -247,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override @Override
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException { public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
if (this.server == null) return; if (this.server == null) return;
for (SolrInputDocument solrdoc : solrdocs) { for (final SolrInputDocument solrdoc : solrdocs) {
if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict" if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict"
} }
synchronized (this.server) { synchronized (this.server) {
@ -278,8 +278,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
this.server.add(solrdocs, -1); this.server.add(solrdocs, -1);
} catch (final Throwable ee) { } catch (final Throwable ee) {
ConcurrentLog.logException(ee); ConcurrentLog.logException(ee);
List<String> ids = new ArrayList<String>(); final List<String> ids = new ArrayList<>();
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); for (final SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
log.warn(e.getMessage() + " IDs=" + ids.toString()); log.warn(e.getMessage() + " IDs=" + ids.toString());
throw new IOException(ee); throw new IOException(ee);
} }
@ -300,11 +300,11 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException { public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected"); if (this.server == null) throw new IOException("server disconnected");
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps // during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String q = params.get(CommonParams.Q); final String q = params.get(CommonParams.Q);
String fq = params.get(CommonParams.FQ); final String fq = params.get(CommonParams.FQ);
String sort = params.get(CommonParams.SORT); final String sort = params.get(CommonParams.SORT);
String fl = params.get(CommonParams.FL); final String fl = params.get(CommonParams.FL);
String threadname = Thread.currentThread().getName(); final String threadname = Thread.currentThread().getName();
QueryResponse rsp; QueryResponse rsp;
int retry = 0; int retry = 0;
Throwable error = null; Throwable error = null;
@ -322,7 +322,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
clearCaches(); // prevent further OOM if this was caused by OOM clearCaches(); // prevent further OOM if this was caused by OOM
} }
ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq)); ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq));
try {Thread.sleep(1000);} catch (InterruptedException e) {} try {Thread.sleep(1000);} catch (final InterruptedException e) {}
} }
throw new IOException("Error executing query", error); throw new IOException("Error executing query", error);
} }
@ -342,10 +342,10 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public int getSegmentCount() { public int getSegmentCount() {
if (this.server == null) return 0; if (this.server == null) return 0;
try { try {
LukeResponse lukeResponse = getIndexBrowser(false); final LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo(); final NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0; if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount"); final Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1; if (segmentCount == null) return 1;
return segmentCount.intValue(); return segmentCount.intValue();
} catch (final Throwable e) { } catch (final Throwable e) {
@ -363,19 +363,19 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server instanceof ServerShard) { if (this.server instanceof ServerShard) {
// the server can be a single shard; we don't know here // the server can be a single shard; we don't know here
// to test that, we submit requests to bots variants // to test that, we submit requests to bots variants
if (useluke == 1) return getSizeLukeRequest(); if (this.useluke == 1) return getSizeLukeRequest();
if (useluke == -1) return getSizeQueryRequest(); if (this.useluke == -1) return getSizeQueryRequest();
long ls = getSizeLukeRequest(); final long ls = getSizeLukeRequest();
long qs = getSizeQueryRequest(); final long qs = getSizeQueryRequest();
if (ls == 0 && qs == 0) { if (ls == 0 && qs == 0) {
// we don't know if this is caused by an error or not; don't change the useluke value // we don't know if this is caused by an error or not; don't change the useluke value
return 0; return 0;
} }
if (ls == qs) { if (ls == qs) {
useluke = 1; this.useluke = 1;
return ls; return ls;
} }
useluke = -1; this.useluke = -1;
return qs; return qs;
} }
return getSizeLukeRequest(); return getSizeLukeRequest();
@ -398,9 +398,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
private long getSizeLukeRequest() { private long getSizeLukeRequest() {
if (this.server == null) return 0; if (this.server == null) return 0;
try { try {
LukeResponse lukeResponse = getIndexBrowser(false); final LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0; if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs(); final Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0; if (numDocs == null) return 0;
return numDocs.longValue(); return numDocs.longValue();
} catch (final Throwable e) { } catch (final Throwable e) {
@ -419,7 +419,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
LukeResponse lukeResponse = null; LukeResponse lukeResponse = null;
try { try {
lukeResponse = lukeRequest.process(this.server); lukeResponse = lukeRequest.process(this.server);
} catch (IOException e) { } catch (final IOException e) {
throw new SolrServerException(e.getMessage()); throw new SolrServerException(e.getMessage());
} }
return lukeResponse; return lukeResponse;

View File

@ -27,10 +27,6 @@ import java.util.Collection;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
@ -38,17 +34,21 @@ import org.apache.solr.core.SolrCore;
import com.google.common.io.Files; import com.google.common.io.Files;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
public class EmbeddedInstance implements SolrInstance { public class EmbeddedInstance implements SolrInstance {
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"}; private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
// additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied // additional a optional solrcore.properties (or solrcore.x86.properties for 32bit systems is copied
private CoreContainer coreContainer; private CoreContainer coreContainer;
private String defaultCoreName; private final String defaultCoreName;
private SolrCore defaultCore; private final SolrCore defaultCore;
private SolrClient defaultCoreServer; private final SolrClient defaultCoreServer;
private File containerPath; private final File containerPath;
private Map<String, SolrCore> cores; private final Map<String, SolrCore> cores;
private Map<String, SolrClient> server; private final Map<String, SolrClient> server;
public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException { public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException {
super(); super();
@ -56,30 +56,30 @@ public class EmbeddedInstance implements SolrInstance {
this.containerPath = containerPath; this.containerPath = containerPath;
// ensure that default core path exists // ensure that default core path exists
File defaultCorePath = new File(containerPath, givenDefaultCoreName); final File defaultCorePath = new File(containerPath, givenDefaultCoreName);
if (!defaultCorePath.exists()) defaultCorePath.mkdirs(); if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
// migrate old conf directory // migrate old conf directory
File oldConf = new File(containerPath, "conf"); final File oldConf = new File(containerPath, "conf");
File confDir = new File(defaultCorePath, "conf"); final File confDir = new File(defaultCorePath, "conf");
if (oldConf.exists()) oldConf.renameTo(confDir); if (oldConf.exists()) oldConf.renameTo(confDir);
// migrate old data directory // migrate old data directory
File oldData = new File(containerPath, "data"); final File oldData = new File(containerPath, "data");
File dataDir = new File(defaultCorePath, "data"); final File dataDir = new File(defaultCorePath, "data");
if (oldData.exists()) oldData.renameTo(dataDir); if (oldData.exists()) oldData.renameTo(dataDir);
// create index subdirectory in data if it does not exist // create index subdirectory in data if it does not exist
File indexDir = new File(dataDir, "index"); final File indexDir = new File(dataDir, "index");
if (!indexDir.exists()) indexDir.mkdirs(); if (!indexDir.exists()) indexDir.mkdirs();
// initialize the cores' configuration // initialize the cores' configuration
for (String coreName: initializeCoreNames) { for (final String coreName: initializeCoreNames) {
initializeCoreConf(solr_config, containerPath, coreName); initializeCoreConf(solr_config, containerPath, coreName);
} }
// initialize the coreContainer // initialize the coreContainer
File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores final File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile); if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile);
@ -94,9 +94,9 @@ public class EmbeddedInstance implements SolrInstance {
this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName); this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName);
// initialize core cache // initialize core cache
this.cores = new ConcurrentHashMap<String, SolrCore>(); this.cores = new ConcurrentHashMap<>();
this.cores.put(this.defaultCoreName, this.defaultCore); this.cores.put(this.defaultCoreName, this.defaultCore);
this.server = new ConcurrentHashMap<String, SolrClient>(); this.server = new ConcurrentHashMap<>();
this.server.put(this.defaultCoreName, this.defaultCoreServer); this.server.put(this.defaultCoreName, this.defaultCoreServer);
} }
@ -113,11 +113,11 @@ public class EmbeddedInstance implements SolrInstance {
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) { private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
// ensure that default core path exists // ensure that default core path exists
File corePath = new File(containerPath, coreName); final File corePath = new File(containerPath, coreName);
if (!corePath.exists()) corePath.mkdirs(); if (!corePath.exists()) corePath.mkdirs();
// check if core.properties exists in each path (thats new in Solr 5.0) // check if core.properties exists in each path (thats new in Solr 5.0)
File core_properties = new File(corePath, "core.properties"); final File core_properties = new File(corePath, "core.properties");
if (!core_properties.exists()) { if (!core_properties.exists()) {
// create the file // create the file
try ( try (
@ -130,25 +130,25 @@ public class EmbeddedInstance implements SolrInstance {
fos.write(ASCII.getBytes("config=${solrconfig:solrconfig.xml}\n")); fos.write(ASCII.getBytes("config=${solrconfig:solrconfig.xml}\n"));
fos.write(ASCII.getBytes("schema=${schema:schema.xml}\n")); fos.write(ASCII.getBytes("schema=${schema:schema.xml}\n"));
fos.write(ASCII.getBytes("coreNodeName=${coreNodeName:}\n")); fos.write(ASCII.getBytes("coreNodeName=${coreNodeName:}\n"));
} catch (IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
} }
// ensure necessary subpaths exist // ensure necessary subpaths exist
File conf = new File(corePath, "conf"); final File conf = new File(corePath, "conf");
conf.mkdirs(); conf.mkdirs();
File data = new File(corePath, "data"); final File data = new File(corePath, "data");
data.mkdirs(); data.mkdirs();
// (over-)write configuration into conf path // (over-)write configuration into conf path
File source, target; File source, target;
for (String cf: confFiles) { for (final String cf: confFiles) {
source = new File(solr_config, cf); source = new File(solr_config, cf);
if (source.isDirectory()) { if (source.isDirectory()) {
target = new File(conf, cf); target = new File(conf, cf);
target.mkdirs(); target.mkdirs();
for (String cfl: source.list()) { for (final String cfl: source.list()) {
try { try {
Files.copy(new File(source, cfl), new File(target, cfl)); Files.copy(new File(source, cfl), new File(target, cfl));
} catch (final IOException e) { } catch (final IOException e) {
@ -168,7 +168,7 @@ public class EmbeddedInstance implements SolrInstance {
// copy the solrcore.properties // copy the solrcore.properties
// for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists // for 32bit systems (os.arch name not containing '64') take the solrcore.x86.properties as solrcore.properties if exists
String os = System.getProperty("os.arch"); final String os = System.getProperty("os.arch");
if (os.contains("64")) { if (os.contains("64")) {
source = new File(solr_config, "solrcore.properties"); source = new File(solr_config, "solrcore.properties");
} else { } else {
@ -242,7 +242,7 @@ public class EmbeddedInstance implements SolrInstance {
@Override @Override
public synchronized void close() { public synchronized void close() {
for (SolrCore core: cores.values()) core.close(); for (final SolrCore core: this.cores.values()) core.close();
if (this.coreContainer != null) try { if (this.coreContainer != null) try {
this.coreContainer.shutdown(); this.coreContainer.shutdown();
this.coreContainer = null; this.coreContainer = null;

View File

@ -36,7 +36,6 @@ import java.util.Set;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
@ -424,7 +423,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (dbocc != null) { if (dbocc != null) {
return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": " + dbocc.name(); return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": " + dbocc.name();
} }
String urls = url.toNormalform(false); final String urls = url.toNormalform(false);
final long oldDate = this.indexSegment.getLoadTime(url.hash()); final long oldDate = this.indexSegment.getLoadTime(url.hash());
// deny urls that exceed allowed number of occurrences // deny urls that exceed allowed number of occurrences
@ -574,26 +573,6 @@ public final class CrawlStacker implements WorkflowTask<Request>{
} }
} }
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
if (!Switchboard.getSwitchboard()
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
final FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null) {
if (!f.isListed(url, null)) {
return "the url '"
+ url
+ "' does not belong to the network mandatory filter list";
}
}
}
}
final boolean local = url.isLocal(); final boolean local = url.isLocal();
if (this.acceptLocalURLs && local) return null; if (this.acceptLocalURLs && local) return null;
if (this.acceptGlobalURLs && !local) return null; if (this.acceptGlobalURLs && !local) return null;

View File

@ -114,8 +114,6 @@ import com.hazelcast.config.NetworkConfig;
import com.hazelcast.core.Hazelcast; import com.hazelcast.core.Hazelcast;
import com.hazelcast.core.HazelcastInstance; import com.hazelcast.core.HazelcastInstance;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.contentcontrol.SMWListSyncThread;
import net.yacy.cora.date.AbstractFormatter; import net.yacy.cora.date.AbstractFormatter;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
@ -501,8 +499,6 @@ public final class Switchboard extends serverSwitch {
// load the network definition // load the network definition
try { try {
this.overwriteNetworkDefinition(this.getSysinfo()); this.overwriteNetworkDefinition(this.getSysinfo());
} catch (final FileNotFoundException e) {
ConcurrentLog.logException(e);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
@ -1267,27 +1263,6 @@ public final class Switchboard extends serverSwitch {
Long.parseLong(this.getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")), Long.parseLong(this.getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")),
Double.parseDouble(this.getConfig(SwitchboardConstants.INDEX_DIST_LOADPREREQ, "9.0"))); Double.parseDouble(this.getConfig(SwitchboardConstants.INDEX_DIST_LOADPREREQ, "9.0")));
// content control: initialize list sync thread
this.deployThread(
"720_ccimport",
"Content Control Import",
"this is the content control import thread",
null,
InstantBusyThread.createFromRunnable(
new SMWListSyncThread(this, sb.getConfig("contentcontrol.bookmarklist", "contentcontrol"),
"Category:Content Source", "/?Url/?Filter/?Category/?Modification date",
sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", false)),
3000, 3000),
2000);
this.deployThread(
"730_ccfilter",
"Content Control Filter",
"this is the content control filter update thread",
null,
InstantBusyThread.createFromRunnable(new ContentControlFilterUpdateThread(this), 3000, 3000),
2000);
// set network-specific performance attributes // set network-specific performance attributes
if ( this.firstInit ) { if ( this.firstInit ) {
this.setRemotecrawlPPM(Math.max(1, (int) this.getConfigLong("network.unit.remotecrawl.speed", 60))); this.setRemotecrawlPPM(Math.max(1, (int) this.getConfigLong("network.unit.remotecrawl.speed", 60)));
@ -2059,7 +2034,7 @@ public final class Switchboard extends serverSwitch {
if ( this.dhtDispatcher != null ) { if ( this.dhtDispatcher != null ) {
this.dhtDispatcher.close(); this.dhtDispatcher.close();
} }
// de.anomic.http.client.Client.closeAllConnections(); // de.anomic.http.client.Client.closeAllConnections();
this.wikiDB.close(); this.wikiDB.close();
this.blogDB.close(); this.blogDB.close();
this.blogCommentDB.close(); this.blogCommentDB.close();
@ -2243,8 +2218,6 @@ public final class Switchboard extends serverSwitch {
if ( gzfile.exists() ) { if ( gzfile.exists() ) {
FileUtils.deletedelete(outfile); FileUtils.deletedelete(outfile);
} }
} catch (final FileNotFoundException e ) {
ConcurrentLog.logException(e);
} catch (final IOException e ) { } catch (final IOException e ) {
/* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */ /* Catch but log any IO exception that can occur on copy, automatic closing or streams creation */
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
@ -3112,8 +3085,6 @@ public final class Switchboard extends serverSwitch {
Document[] documents = null; Document[] documents = null;
try { try {
documents = this.parseDocument(in.queueEntry); documents = this.parseDocument(in.queueEntry);
} catch (final InterruptedException e ) {
documents = null;
} catch (final Exception e ) { } catch (final Exception e ) {
documents = null; documents = null;
} }
@ -4291,7 +4262,7 @@ public final class Switchboard extends serverSwitch {
this.log.info("dhtTransferJob: too many connections in httpc pool : " this.log.info("dhtTransferJob: too many connections in httpc pool : "
+ ConnectionInfo.getCount()); + ConnectionInfo.getCount());
// close unused connections // close unused connections
// Client.cleanup(); // Client.cleanup();
} else if ( kbytesUp > 128 ) { } else if ( kbytesUp > 128 ) {
this.log.info("dhtTransferJob: too much upload(1), currently uploading: " + kbytesUp + " Kb"); this.log.info("dhtTransferJob: too much upload(1), currently uploading: " + kbytesUp + " Kb");
} else { } else {
@ -4331,7 +4302,7 @@ public final class Switchboard extends serverSwitch {
this.log.info("dhtTransferJob: too many connections in httpc pool : " this.log.info("dhtTransferJob: too many connections in httpc pool : "
+ ConnectionInfo.getCount()); + ConnectionInfo.getCount());
// close unused connections // close unused connections
// Client.cleanup(); // Client.cleanup();
} else if ( kbytesUp > 256 ) { } else if ( kbytesUp > 256 ) {
this.log.info("dhtTransferJob: too much upload(2), currently uploading: " + kbytesUp + " Kb"); this.log.info("dhtTransferJob: too much upload(2), currently uploading: " + kbytesUp + " Kb");
} else { } else {

View File

@ -51,7 +51,6 @@ import java.util.regex.Pattern;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.analysis.Classification; import net.yacy.cora.document.analysis.Classification;
import net.yacy.cora.document.analysis.Classification.ContentDomain; import net.yacy.cora.document.analysis.Classification.ContentDomain;
@ -95,7 +94,6 @@ import net.yacy.peers.RemoteSearch;
import net.yacy.peers.SeedDB; import net.yacy.peers.SeedDB;
import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.EventTracker; import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
@ -186,9 +184,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** a set of words that are used to match with the snippets */ /** a set of words that are used to match with the snippets */
private final Set<String> snippetFetchWords; private final Set<String> snippetFetchWords;
private final boolean deleteIfSnippetFail; private final boolean deleteIfSnippetFail;
private long urlRetrievalAllTime; private final long urlRetrievalAllTime;
private long snippetComputationAllTime; private final long snippetComputationAllTime;
private ConcurrentHashMap<String, LinkedHashSet<String>> snippets; private final ConcurrentHashMap<String, LinkedHashSet<String>> snippets;
private final boolean remote; private final boolean remote;
/** add received results to local index (defult=true) */ /** add received results to local index (defult=true) */
@ -283,7 +281,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
return Math.max( return Math.max(
this.local_rwi_available.get() + this.remote_rwi_available.get() + this.local_rwi_available.get() + this.remote_rwi_available.get() +
this.remote_solr_available.get() + Math.max(0, this.local_solr_stored.get() - this.local_solr_evicted.get()), this.remote_solr_available.get() + Math.max(0, this.local_solr_stored.get() - this.local_solr_evicted.get()),
imageViewed.size() + sizeSpare() this.imageViewed.size() + sizeSpare()
); );
} }
@ -324,17 +322,17 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long ab = MemoryControl.available(); long ab = MemoryControl.available();
if (ab < 1024 * 1024 * 200) { if (ab < 1024 * 1024 * 200) {
int eb = SearchEventCache.size(); final int eb = SearchEventCache.size();
SearchEventCache.cleanupEvents(false); SearchEventCache.cleanupEvents(false);
int en = SearchEventCache.size(); final int en = SearchEventCache.size();
if (en < eb) { if (en < eb) {
log.info("Cleaned up search event cache (1) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed"); log.info("Cleaned up search event cache (1) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
} }
} }
ab = MemoryControl.available(); ab = MemoryControl.available();
int eb = SearchEventCache.size(); final int eb = SearchEventCache.size();
SearchEventCache.cleanupEvents(Math.max(1, (int) (MemoryControl.available() / (1024 * 1024 * 120)))); SearchEventCache.cleanupEvents(Math.max(1, (int) (MemoryControl.available() / (1024 * 1024 * 120))));
int en = SearchEventCache.size(); final int en = SearchEventCache.size();
if (en < eb) { if (en < eb) {
log.info("Cleaned up search event cache (2) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed"); log.info("Cleaned up search event cache (2) " + eb + "->" + en + ", " + (ab - MemoryControl.available()) / 1024 / 1024 + " MB freed");
} }
@ -348,7 +346,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.imagePageCounter = query.offset; this.imagePageCounter = query.offset;
} }
this.loader = loader; this.loader = loader;
this.nodeStack = new WeakPriorityBlockingQueue<URIMetadataNode>(max_results_node, false); this.nodeStack = new WeakPriorityBlockingQueue<>(max_results_node, false);
this.maxExpectedRemoteReferences = new AtomicInteger(0); this.maxExpectedRemoteReferences = new AtomicInteger(0);
this.expectedRemoteReferences = new AtomicInteger(0); this.expectedRemoteReferences = new AtomicInteger(0);
this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true); this.excludeintext_image = Switchboard.getSwitchboard().getConfigBool("search.excludeintext.image", true);
@ -377,7 +375,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap<>(this) : null; this.protocolNavigator = protocolNavEnabled ? new ConcurrentScoreMap<>(this) : null;
this.dateNavigator = dateNavEnabled ? new ConcurrentScoreMap<>(this) : null; this.dateNavigator = dateNavEnabled ? new ConcurrentScoreMap<>(this) : null;
this.topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0; this.topicNavigatorCount = topicsNavEnabled ? MAX_TOPWORDS : 0;
this.vocabularyNavigator = new TreeMap<String, ScoreMap<String>>(); this.vocabularyNavigator = new TreeMap<>();
// prepare configured search navigation (plugins) // prepare configured search navigation (plugins)
this.navigatorPlugins = NavigatorPlugins.initFromCfgStrings(navConfigs); this.navigatorPlugins = NavigatorPlugins.initFromCfgStrings(navConfigs);
if(this.navigatorPlugins != null) { if(this.navigatorPlugins != null) {
@ -386,14 +384,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
} }
this.snippets = new ConcurrentHashMap<String, LinkedHashSet<String>>(); this.snippets = new ConcurrentHashMap<>();
this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches this.secondarySearchSuperviser = (this.query.getQueryGoal().getIncludeHashes().size() > 1) ? new SecondarySearchSuperviser(this) : null; // generate abstracts only for combined searches
if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start(); if (this.secondarySearchSuperviser != null) this.secondarySearchSuperviser.start();
this.secondarySearchThreads = null; this.secondarySearchThreads = null;
this.preselectedPeerHashes = preselectedPeerHashes; this.preselectedPeerHashes = preselectedPeerHashes;
this.IAResults = new TreeMap<byte[], String>(Base64Order.enhancedCoder); this.IAResults = new TreeMap<>(Base64Order.enhancedCoder);
this.IACount = new TreeMap<byte[], Integer>(Base64Order.enhancedCoder); this.IACount = new TreeMap<>(Base64Order.enhancedCoder);
this.heuristics = new TreeMap<byte[], HeuristicResult>(Base64Order.enhancedCoder); this.heuristics = new TreeMap<>(Base64Order.enhancedCoder);
this.IAmaxcounthash = null; this.IAmaxcounthash = null;
this.IAneardhthash = null; this.IAneardhthash = null;
this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false))); this.remote = (peers != null && peers.sizeConnected() > 0) && (this.query.domType == QueryParams.Searchdom.CLUSTER || (this.query.domType == QueryParams.Searchdom.GLOBAL && Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW_SEARCH, false)));
@ -420,10 +418,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking // sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null; this.localSearchInclusion = null;
this.ref = new ConcurrentScoreMap<String>(this); this.ref = new ConcurrentScoreMap<>(this);
this.maxtime = query.maxtime; this.maxtime = query.maxtime;
this.rwiStack = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false); this.rwiStack = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
this.doubleDomCache = new ConcurrentHashMap<String, WeakPriorityBlockingQueue<WordReferenceVars>>(); this.doubleDomCache = new ConcurrentHashMap<>();
this.flagcount = new int[32]; this.flagcount = new int[32];
for ( int i = 0; i < 32; i++ ) { for ( int i = 0; i < 32; i++ ) {
this.flagcount[i] = 0; this.flagcount[i] = 0;
@ -435,8 +433,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.receivedRemoteReferences = new AtomicInteger(0); this.receivedRemoteReferences = new AtomicInteger(0);
this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang); this.order = new ReferenceOrder(this.query.ranking, this.query.targetlang);
this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100); this.urlhashes = new RowHandleSet(Word.commonHashLength, Word.commonHashOrder, 100);
this.taggingPredicates = new HashMap<String, String>(); this.taggingPredicates = new HashMap<>();
for (Tagging t: LibraryProvider.autotagging.getVocabularies()) { for (final Tagging t: LibraryProvider.autotagging.getVocabularies()) {
this.taggingPredicates.put(t.getName(), t.getPredicate()); this.taggingPredicates.put(t.getName(), t.getPredicate());
} }
@ -453,8 +451,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.rwiProcess = null; this.rwiProcess = null;
if (query.getSegment().connectedRWI() && !Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_DHT_OFF, false)) { if (query.getSegment().connectedRWI() && !Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_DHT_OFF, false)) {
// we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old // we start the local search only if this peer is doing a remote search or when it is doing a local search and the peer is old
rwiProcess = new RWIProcess(this.localsolrsearch); this.rwiProcess = new RWIProcess(this.localsolrsearch);
rwiProcess.start(); this.rwiProcess.start();
} }
if (this.remote) { if (this.remote) {
@ -465,8 +463,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.primarySearchThreadsL = null; this.primarySearchThreadsL = null;
this.nodeSearchThreads = null; this.nodeSearchThreads = null;
} else { } else {
this.primarySearchThreadsL = new ArrayList<RemoteSearch>(); this.primarySearchThreadsL = new ArrayList<>();
this.nodeSearchThreads = new ArrayList<Thread>(); this.nodeSearchThreads = new ArrayList<>();
// start this concurrently because the remote search needs an enumeration // start this concurrently because the remote search needs an enumeration
// of the remote peers which may block in some cases when i.e. DHT is active // of the remote peers which may block in some cases when i.e. DHT is active
// at the same time. // at the same time.
@ -502,7 +500,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if ( generateAbstracts ) { if ( generateAbstracts ) {
// we need the results now // we need the results now
try { try {
if (rwiProcess != null && query.getSegment().connectedRWI()) rwiProcess.join(); if (this.rwiProcess != null && query.getSegment().connectedRWI()) this.rwiProcess.join();
} catch (final Throwable e ) { } catch (final Throwable e ) {
} }
// compute index abstracts // compute index abstracts
@ -535,7 +533,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// give process time to accumulate a certain amount of data // give process time to accumulate a certain amount of data
// before a reading process wants to get results from it // before a reading process wants to get results from it
try { try {
if (rwiProcess != null && query.getSegment().connectedRWI() && rwiProcess.isAlive()) rwiProcess.join(100); if (this.rwiProcess != null && query.getSegment().connectedRWI() && this.rwiProcess.isAlive()) this.rwiProcess.join(100);
} catch (final Throwable e ) { } catch (final Throwable e ) {
} }
// this will reduce the maximum waiting time until results are available to 100 milliseconds // this will reduce the maximum waiting time until results are available to 100 milliseconds
@ -547,14 +545,14 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.deleteIfSnippetFail = deleteIfSnippetFail; this.deleteIfSnippetFail = deleteIfSnippetFail;
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.resultList = new WeakPriorityBlockingQueue<URIMetadataNode>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking this.resultList = new WeakPriorityBlockingQueue<>(Math.max(max_results_node, 10 * query.itemsPerPage()), true); // this is the result, enriched with snippets, ranked and ordered by ranking
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,
// only with the query minus the stopwords which had not been used for the search // only with the query minus the stopwords which had not been used for the search
boolean filtered = false; boolean filtered = false;
// check if query contains stopword // check if query contains stopword
if (Switchboard.stopwordHashes != null) { if (Switchboard.stopwordHashes != null) {
Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator(); final Iterator<byte[]> it = query.getQueryGoal().getIncludeHashes().iterator();
while (it.hasNext()) { while (it.hasNext()) {
if (Switchboard.stopwordHashes.contains((it.next()))) { if (Switchboard.stopwordHashes.contains((it.next()))) {
filtered = true; filtered = true;
@ -600,7 +598,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
@Override @Override
public void run() { public void run() {
if (query.getSegment().termIndex() == null) return; // nothing to do; this index is not used if (SearchEvent.this.query.getSegment().termIndex() == null) return; // nothing to do; this index is not used
// do a search // do a search
oneFeederStarted(); oneFeederStarted();
@ -634,7 +632,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
SearchEvent.this.query.modifier.sitehost != null && SearchEvent.this.query.modifier.sitehost.length() > 0 SearchEvent.this.query.modifier.sitehost != null && SearchEvent.this.query.modifier.sitehost.length() > 0
) { ) {
// try again with sitehost // try again with sitehost
String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost); final String newGoal = Domains.getSmartSLD(SearchEvent.this.query.modifier.sitehost);
search = search =
SearchEvent.this.query SearchEvent.this.query
.getSegment() .getSegment()
@ -695,7 +693,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// normalize entries // normalize entries
final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local); final BlockingQueue<WordReferenceVars> decodedEntries = this.order.normalizeWith(index, maxtime, local);
int is = index.size(); final int is = index.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch( EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
this.query.id(true), this.query.id(true),
SearchEventType.NORMALIZING, SearchEventType.NORMALIZING,
@ -708,7 +706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
timer = System.currentTimeMillis(); timer = System.currentTimeMillis();
// apply all constraints // apply all constraints
long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime; final long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
int successcounter = 0; int successcounter = 0;
try { try {
WordReferenceVars iEntry; WordReferenceVars iEntry;
@ -716,7 +714,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
String acceptableAlternativeSitehash = null; String acceptableAlternativeSitehash = null;
if (this.query.modifier.sitehost != null && this.query.modifier.sitehost.length() > 0) try { if (this.query.modifier.sitehost != null && this.query.modifier.sitehost.length() > 0) try {
acceptableAlternativeSitehash = DigestURL.hosthash(this.query.modifier.sitehost.startsWith("www.") ? this.query.modifier.sitehost.substring(4) : "www." + this.query.modifier.sitehost, 80); acceptableAlternativeSitehash = DigestURL.hosthash(this.query.modifier.sitehost.startsWith("www.") ? this.query.modifier.sitehost.substring(4) : "www." + this.query.modifier.sitehost, 80);
} catch (MalformedURLException e1) {} } catch (final MalformedURLException e1) {}
pollloop: while ( true ) { pollloop: while ( true ) {
remaining = timeout - System.currentTimeMillis(); remaining = timeout - System.currentTimeMillis();
if (remaining <= 0) { if (remaining <= 0) {
@ -740,7 +738,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// increase flag counts // increase flag counts
Bitfield flags = iEntry.flags(); final Bitfield flags = iEntry.flags();
for (int j = 0; j < 32; j++) { for (int j = 0; j < 32; j++) {
if (flags.get(j)) this.flagcount[j]++; if (flags.get(j)) this.flagcount[j]++;
} }
@ -806,7 +804,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
this.urlhashes.putUnique(iEntry.urlhash()); this.urlhashes.putUnique(iEntry.urlhash());
rankingtryloop: while (true) { rankingtryloop: while (true) {
try { try {
this.rwiStack.put(new ReverseElement<WordReferenceVars>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest) this.rwiStack.put(new ReverseElement<>(iEntry, this.order.cardinal(iEntry))); // inserts the element and removes the worst (which is smallest)
break rankingtryloop; break rankingtryloop;
} catch (final ArithmeticException e ) { } catch (final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation // this may happen if the concurrent normalizer changes values during cardinal computation
@ -821,8 +819,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
if (System.currentTimeMillis() >= timeout) ConcurrentLog.warn("SearchEvent", "rwi normalization ended with timeout = " + maxtime); if (System.currentTimeMillis() >= timeout) ConcurrentLog.warn("SearchEvent", "rwi normalization ended with timeout = " + maxtime);
} catch (final InterruptedException e ) { } catch (final InterruptedException | SpaceExceededException e ) {
} catch (final SpaceExceededException e ) {
} }
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
@ -847,7 +844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// stop all threads // stop all threads
if (this.localsolrsearch != null) { if (this.localsolrsearch != null) {
if (localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();} if (this.localsolrsearch.isAlive()) synchronized (this.localsolrsearch) {this.localsolrsearch.interrupt();}
} }
if (this.nodeSearchThreads != null) { if (this.nodeSearchThreads != null) {
for (final Thread search : this.nodeSearchThreads) { for (final Thread search : this.nodeSearchThreads) {
@ -969,7 +966,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long timer = System.currentTimeMillis(); long timer = System.currentTimeMillis();
// normalize entries // normalize entries
int is = nodeList.size(); final int is = nodeList.size();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.NORMALIZING, resourceName, is, System.currentTimeMillis() - timer), false); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.NORMALIZING, resourceName, is, System.currentTimeMillis() - timer), false);
if (!local) { if (!local) {
this.receivedRemoteReferences.addAndGet(is); this.receivedRemoteReferences.addAndGet(is);
@ -985,7 +982,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply all constraints // apply all constraints
try { try {
pollloop: for (URIMetadataNode iEntry: nodeList) { pollloop: for (final URIMetadataNode iEntry: nodeList) {
// check url related eventual constraints (protocol, tld, sitehost, and filetype) // check url related eventual constraints (protocol, tld, sitehost, and filetype)
final String matchingResult = QueryParams.matchesURL(this.query.modifier, this.query.tld, iEntry.url()); final String matchingResult = QueryParams.matchesURL(this.query.modifier, this.query.tld, iEntry.url());
@ -1019,7 +1016,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// check constraints // check constraints
Bitfield flags = iEntry.flags(); final Bitfield flags = iEntry.flags();
if (!this.testFlags(flags)) { if (!this.testFlags(flags)) {
if (log.isFine()) log.fine("dropped Node: flag test"); if (log.isFine()) log.fine("dropped Node: flag test");
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators); updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
@ -1049,7 +1046,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// filter out media links in text search, if wanted // filter out media links in text search, if wanted
String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName()); final String ext = MultiProtocolURL.getFileExtension(iEntry.url().getFileName());
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) { if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
if (log.isFine()) log.fine("dropped Node: file name domain does not match"); if (log.isFine()) log.fine("dropped Node: file name domain does not match");
updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators); updateCountsOnSolrEntryToEvict(iEntry, facets, local, !incrementNavigators);
@ -1097,12 +1094,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
long score; long score;
// determine nodestack ranking (will be altered by postranking) // determine nodestack ranking (will be altered by postranking)
// so far Solr score is used (with abitrary factor to get value similar to rwi ranking values) // so far Solr score is used (with abitrary factor to get value similar to rwi ranking values)
Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result final Float scorex = (Float) iEntry.getFieldValue("score"); // this is a special field containing the ranking score of a Solr search result
if (scorex != null && scorex > 0) if (scorex != null && scorex > 0)
score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly score = (long) ((1000000.0f * scorex) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
else else
score = this.order.cardinal(iEntry); score = this.order.cardinal(iEntry);
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score)); // inserts the element and removes the worst (which is smallest) this.nodeStack.put(new ReverseElement<>(iEntry, score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop; break rankingtryloop;
} catch (final ArithmeticException e ) { } catch (final ArithmeticException e ) {
// this may happen if the concurrent normalizer changes values during cardinal computation // this may happen if the concurrent normalizer changes values during cardinal computation
@ -1131,8 +1128,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
private void incrNavigatorsFromSolrFacets(final Map<String, ReversibleScoreMap<String>> facets) { private void incrNavigatorsFromSolrFacets(final Map<String, ReversibleScoreMap<String>> facets) {
if(facets != null && !facets.isEmpty()) { if(facets != null && !facets.isEmpty()) {
/* Iterate over active navigator plugins to let them update the counters */ /* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) { for (final String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s); final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) { if (navi != null) {
navi.incFacet(facets); navi.incFacet(facets);
} }
@ -1144,8 +1141,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* is expressed as a spatial filter not producing facets counts (see QueryParams.getFacetsFilterQueries()). */ * is expressed as a spatial filter not producing facets counts (see QueryParams.getFacetsFilterQueries()). */
fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName()); fcts = facets.get(CollectionSchema.coordinate_p_0_coordinate.getSolrFieldName());
if (fcts != null) { if (fcts != null) {
for (String coordinate: fcts) { for (final String coordinate: fcts) {
int hc = fcts.get(coordinate); final int hc = fcts.get(coordinate);
if (hc == 0) continue; if (hc == 0) continue;
this.locationNavigator.inc(coordinate, hc); this.locationNavigator.inc(coordinate, hc);
} }
@ -1161,9 +1158,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName()); fcts = facets.get(CollectionSchema.url_protocol_s.getSolrFieldName());
if (fcts != null) { if (fcts != null) {
// remove all protocols that we don't know // remove all protocols that we don't know
Iterator<String> i = fcts.iterator(); final Iterator<String> i = fcts.iterator();
while (i.hasNext()) { while (i.hasNext()) {
String protocol = i.next(); final String protocol = i.next();
if (PROTOCOL_NAVIGATOR_SUPPORTED_VALUES.indexOf(protocol) < 0) { if (PROTOCOL_NAVIGATOR_SUPPORTED_VALUES.indexOf(protocol) < 0) {
i.remove(); i.remove();
} }
@ -1173,15 +1170,15 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// get the vocabulary navigation // get the vocabulary navigation
Set<String> genericFacets = new LinkedHashSet<>(); final Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName()); for (final Tagging v: LibraryProvider.autotagging.getVocabularies()) genericFacets.add(v.getName());
genericFacets.addAll(ProbabilisticClassifier.getContextNames()); genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName: genericFacets) { for (final String vocName: genericFacets) {
fcts = facets.get(CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX); fcts = facets.get(CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
if (fcts != null) { if (fcts != null) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName); ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) { if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>(); vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav); this.vocabularyNavigator.put(vocName, vocNav);
} }
vocNav.inc(fcts); vocNav.inc(fcts);
@ -1199,8 +1196,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Map<String, ReversibleScoreMap<String>> facets) { final Map<String, ReversibleScoreMap<String>> facets) {
/* Iterate over active navigator plugins to let them update the counters */ /* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) { for (final String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s); final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null && facets == null || !facets.containsKey(navi.getIndexFieldName())) { if (navi != null && facets == null || !facets.containsKey(navi.getIndexFieldName())) {
navi.incDoc(doc); navi.incDoc(doc);
} }
@ -1211,7 +1208,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (this.dateNavigator != null) { if (this.dateNavigator != null) {
if (facets == null || !facets.containsKey(CollectionSchema.dates_in_content_dts.getSolrFieldName())) { if (facets == null || !facets.containsKey(CollectionSchema.dates_in_content_dts.getSolrFieldName())) {
Date[] dates = doc.datesInContent(); final Date[] dates = doc.datesInContent();
if (dates != null) { if (dates != null) {
for (final Date date : dates) { for (final Date date : dates) {
if (date != null) { if (date != null) {
@ -1234,12 +1231,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation // get the vocabulary navigation
if(this.vocabularyNavigator != null) { if(this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>(); final Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) { for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName()); genericFacets.add(v.getName());
} }
genericFacets.addAll(ProbabilisticClassifier.getContextNames()); genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName : genericFacets) { for (final String vocName : genericFacets) {
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX; final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName + CollectionSchema.VOCABULARY_TERMS_SUFFIX;
if (facets == null || !facets.containsKey(fieldName)) { if (facets == null || !facets.containsKey(fieldName)) {
incrementVocNavigator(doc, vocName, fieldName); incrementVocNavigator(doc, vocName, fieldName);
@ -1259,7 +1256,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if(docValue instanceof String) { if(docValue instanceof String) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName); ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) { if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>(); vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav); this.vocabularyNavigator.put(vocName, vocNav);
} }
vocNav.inc((String)docValue); vocNav.inc((String)docValue);
@ -1267,7 +1264,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (!((Collection<?>) docValue).isEmpty()) { if (!((Collection<?>) docValue).isEmpty()) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName); ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav == null) { if (vocNav == null) {
vocNav = new ConcurrentScoreMap<String>(); vocNav = new ConcurrentScoreMap<>();
this.vocabularyNavigator.put(vocName, vocNav); this.vocabularyNavigator.put(vocName, vocNav);
} }
for (final Object singleDocValue : (Collection<?>) docValue) { for (final Object singleDocValue : (Collection<?>) docValue) {
@ -1306,7 +1303,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
rwi = this.rwiStack.poll(); rwi = this.rwiStack.poll();
if (rwi == null) return null; if (rwi == null) return null;
if (!skipDoubleDom) { if (!skipDoubleDom) {
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi); final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) { if (node == null) {
decrementCounts(rwi.getElement()); decrementCounts(rwi.getElement());
continue pollloop; continue pollloop;
@ -1322,9 +1319,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
m = this.doubleDomCache.get(hosthash); m = this.doubleDomCache.get(hosthash);
if (m == null) { if (m == null) {
// first appearance of dom. we create an entry to signal that one of that domain was already returned // first appearance of dom. we create an entry to signal that one of that domain was already returned
m = new WeakPriorityBlockingQueue<WordReferenceVars>(max_results_rwi, false); m = new WeakPriorityBlockingQueue<>(max_results_rwi, false);
this.doubleDomCache.put(hosthash, m); this.doubleDomCache.put(hosthash, m);
URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi); final URIMetadataNode node = this.query.getSegment().fulltext().getMetadata(rwi);
if (node == null) { if (node == null) {
decrementCounts(rwi.getElement()); decrementCounts(rwi.getElement());
continue pollloop; continue pollloop;
@ -1390,7 +1387,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
URIMetadataNode node = null; URIMetadataNode node = null;
try { try {
node = this.query.getSegment().fulltext().getMetadata(bestEntry); node = this.query.getSegment().fulltext().getMetadata(bestEntry);
} catch (Throwable e) { } catch (final Throwable e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
if (node == null) { if (node == null) {
@ -1442,7 +1439,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// check content domain // check content domain
ContentDomain contentDomain = page.getContentDomain(); final ContentDomain contentDomain = page.getContentDomain();
if (this.query.contentdom.getCode() > 0 && ( if (this.query.contentdom.getCode() > 0 && (
(this.query.contentdom == Classification.ContentDomain.IMAGE && contentDomain != Classification.ContentDomain.IMAGE) || (this.query.contentdom == Classification.ContentDomain.IMAGE && contentDomain != Classification.ContentDomain.IMAGE) ||
(this.query.contentdom == Classification.ContentDomain.AUDIO && contentDomain != Classification.ContentDomain.AUDIO) || (this.query.contentdom == Classification.ContentDomain.AUDIO && contentDomain != Classification.ContentDomain.AUDIO) ||
@ -1454,7 +1451,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// filter out media links in text search, if wanted // filter out media links in text search, if wanted
String ext = MultiProtocolURL.getFileExtension(page.url().getFileName()); final String ext = MultiProtocolURL.getFileExtension(page.url().getFileName());
if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) { if (this.query.contentdom == ContentDomain.TEXT && Classification.isImageExtension(ext) && this.excludeintext_image) {
if (log.isFine()) log.fine("dropped RWI: file name domain does not match"); if (log.isFine()) log.fine("dropped RWI: file name domain does not match");
continue; continue;
@ -1480,7 +1477,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check modifier constraint collection // check modifier constraint collection
// this is not available in pure RWI entries (but in local or via solr query received metadate/entries), // this is not available in pure RWI entries (but in local or via solr query received metadate/entries),
if (this.query.modifier.collection != null) { if (this.query.modifier.collection != null) {
Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value final Collection<Object> docCols = page.getFieldValues(CollectionSchema.collection_sxt.getSolrFieldName()); // get multivalued value
if (docCols == null) { // no collection info if (docCols == null) { // no collection info
decrementCounts(page.word()); decrementCounts(page.word());
continue; continue;
@ -1504,16 +1501,6 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
continue; continue;
} }
// content control
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false)) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null && !f.isListed(page.url(), null)) {
if (log.isFine()) log.fine("dropped RWI: url is blacklisted in contentcontrol");
decrementCounts(page.word());
continue;
}
}
final String pageurl = page.url().toNormalform(true); final String pageurl = page.url().toNormalform(true);
final String pageauthor = page.dc_creator(); final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase(); final String pagetitle = page.dc_title().toLowerCase();
@ -1551,9 +1538,9 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check geo coordinates // check geo coordinates
double lat, lon; double lat, lon;
if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) { if (this.query.radius > 0.0d && this.query.lat != 0.0d && this.query.lon != 0.0d && (lat = page.lat()) != 0.0d && (lon = page.lon()) != 0.0d) {
double latDelta = this.query.lat - lat; final double latDelta = this.query.lat - lat;
double lonDelta = this.query.lon - lon; final double lonDelta = this.query.lon - lon;
double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras final double distance = Math.sqrt(latDelta * latDelta + lonDelta * lonDelta); // pythagoras
if (distance > this.query.radius) { if (distance > this.query.radius) {
if (log.isFine()) log.fine("dropped RWI: radius constraint"); if (log.isFine()) log.fine("dropped RWI: radius constraint");
decrementCounts(page.word()); decrementCounts(page.word());
@ -1564,10 +1551,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field} // check vocabulary terms (metatags) {only available in Solr index as vocabulary_xxyyzzz_sxt field}
// TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL) // TODO: vocabulary is only valid and available in local Solr index (consider to auto-switch to Searchdom.LOCAL)
if (this.query.metatags != null && !this.query.metatags.isEmpty()) { if (this.query.metatags != null && !this.query.metatags.isEmpty()) {
tagloop: for (Tagging.Metatag tag : this.query.metatags) { tagloop: for (final Tagging.Metatag tag : this.query.metatags) {
SolrDocument sdoc = page; final SolrDocument sdoc = page;
if (sdoc != null) { if (sdoc != null) {
Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX); final Collection<Object> tagvalues = sdoc.getFieldValues(CollectionSchema.VOCABULARY_PREFIX + tag.getVocabularyName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
if (tagvalues != null && tagvalues.contains(tag.getObject())) { if (tagvalues != null && tagvalues.contains(tag.getObject())) {
continue tagloop; // metatag exists check next tag (filter may consist of several tags) continue tagloop; // metatag exists check next tag (filter may consist of several tags)
} }
@ -1582,8 +1569,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// TODO: it may be a little bit late here, to update navigator counters // TODO: it may be a little bit late here, to update navigator counters
// iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts) // iterate over active navigator plugins (the rwi metadata may contain the field the plugin counts)
for (String s : this.navigatorPlugins.keySet()) { for (final String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s); final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) { if (navi != null) {
navi.incDoc(page); navi.incDoc(page);
} }
@ -1597,7 +1584,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
if(this.dateNavigator != null) { if(this.dateNavigator != null) {
Date[] dates = page.datesInContent(); final Date[] dates = page.datesInContent();
if (dates != null) { if (dates != null) {
for (final Date date : dates) { for (final Date date : dates) {
if (date != null) { if (date != null) {
@ -1609,8 +1596,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// handle the vocabulary navigator // handle the vocabulary navigator
if (this.vocabularyNavigator != null) { if (this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>(); final Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) { for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName()); genericFacets.add(v.getName());
} }
genericFacets.addAll(ProbabilisticClassifier.getContextNames()); genericFacets.addAll(ProbabilisticClassifier.getContextNames());
@ -1674,8 +1661,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/* Iterate over active navigator plugins to let them update the counters */ /* Iterate over active navigator plugins to let them update the counters */
for (String s : this.navigatorPlugins.keySet()) { for (final String s : this.navigatorPlugins.keySet()) {
Navigator navi = this.navigatorPlugins.get(s); final Navigator navi = this.navigatorPlugins.get(s);
if (navi != null) { if (navi != null) {
if (navIncrementedWithFacets) { if (navIncrementedWithFacets) {
fcts = facets.get(navi.getIndexFieldName()); fcts = facets.get(navi.getIndexFieldName());
@ -1719,7 +1706,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else { } else {
fcts = null; fcts = null;
} }
Date[] dates = entry.datesInContent(); final Date[] dates = entry.datesInContent();
if (dates != null) { if (dates != null) {
for (final Date date : dates) { for (final Date date : dates) {
if (date != null) { if (date != null) {
@ -1752,12 +1739,12 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// get the vocabulary navigation // get the vocabulary navigation
if (this.vocabularyNavigator != null) { if (this.vocabularyNavigator != null) {
Set<String> genericFacets = new LinkedHashSet<>(); final Set<String> genericFacets = new LinkedHashSet<>();
for (Tagging v : LibraryProvider.autotagging.getVocabularies()) { for (final Tagging v : LibraryProvider.autotagging.getVocabularies()) {
genericFacets.add(v.getName()); genericFacets.add(v.getName());
} }
genericFacets.addAll(ProbabilisticClassifier.getContextNames()); genericFacets.addAll(ProbabilisticClassifier.getContextNames());
for (String vocName : genericFacets) { for (final String vocName : genericFacets) {
final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName final String fieldName = CollectionSchema.VOCABULARY_PREFIX + vocName
+ CollectionSchema.VOCABULARY_TERMS_SUFFIX; + CollectionSchema.VOCABULARY_TERMS_SUFFIX;
if (navIncrementedWithFacets) { if (navIncrementedWithFacets) {
@ -1765,20 +1752,20 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} else { } else {
fcts = null; fcts = null;
} }
Object docValue = entry.getFieldValue(fieldName); final Object docValue = entry.getFieldValue(fieldName);
if (docValue instanceof String) { if (docValue instanceof String) {
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) docValue))) { if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) docValue))) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName); final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav != null && vocNav.get((String) docValue) > 0) { if (vocNav != null && vocNav.get((String) docValue) > 0) {
vocNav.dec((String) docValue); vocNav.dec((String) docValue);
} }
} }
} else if(docValue instanceof Collection) { } else if(docValue instanceof Collection) {
if (!((Collection<?>) docValue).isEmpty()) { if (!((Collection<?>) docValue).isEmpty()) {
for (Object singleDocValue : (Collection<?>) docValue) { for (final Object singleDocValue : (Collection<?>) docValue) {
if (singleDocValue instanceof String) { if (singleDocValue instanceof String) {
if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) singleDocValue))) { if (navIncrementedEarlier || (fcts != null && fcts.containsKey((String) singleDocValue))) {
ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName); final ScoreMap<String> vocNav = this.vocabularyNavigator.get(vocName);
if (vocNav != null && vocNav.get((String) singleDocValue) > 0) { if (vocNav != null && vocNav.get((String) singleDocValue) > 0) {
vocNav.dec((String) singleDocValue); vocNav.dec((String) singleDocValue);
} }
@ -1815,10 +1802,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
if (this.ref.size() <= ic) { // size matches return map directly if (this.ref.size() <= ic) { // size matches return map directly
result = this.getTopics(/*ic, 500*/); result = this.getTopics(/*ic, 500*/);
} else { // collect top most count topics } else { // collect top most count topics
result = new ConcurrentScoreMap<String>(); result = new ConcurrentScoreMap<>();
Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false); final Iterator<String> it = this.getTopics(/*ic, 500*/).keys(false);
while (ic-- > 0 && it.hasNext()) { while (ic-- > 0 && it.hasNext()) {
String word = it.next(); final String word = it.next();
result.set(word, this.ref.get(word)); result.set(word, this.ref.get(word));
} }
} }
@ -1836,8 +1823,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
*/ */
public boolean drainStacksToResult(boolean concurrentSnippetFetch) { public boolean drainStacksToResult(boolean concurrentSnippetFetch) {
// we take one entry from both stacks at the same time // we take one entry from both stacks at the same time
boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch); final boolean solrSuccess = drainSolrStackToResult(concurrentSnippetFetch);
boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch); final boolean rwiSuccess = drainRWIStackToResult(concurrentSnippetFetch);
return solrSuccess || rwiSuccess; return solrSuccess || rwiSuccess;
} }
@ -1857,7 +1844,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
success = true; success = true;
} }
} else { } else {
Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") { final Thread t = new Thread("SearchEvent.drainStacksToResult.oneFilteredFromRWI") {
@Override @Override
public void run() { public void run() {
SearchEvent.this.oneFeederStarted(); SearchEvent.this.oneFeederStarted();
@ -1894,7 +1881,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null; final Element<URIMetadataNode> localEntryElement = this.nodeStack.sizeQueue() > 0 ? this.nodeStack.poll() : null;
final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement(); final URIMetadataNode node = localEntryElement == null ? null : localEntryElement.getElement();
if (node != null) { if (node != null) {
LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once final LinkedHashSet<String> solrsnippetlines = this.snippets.remove(ASCII.String(node.hash())); // we can remove this because it's used only once
if (solrsnippetlines != null && solrsnippetlines.size() > 0) { if (solrsnippetlines != null && solrsnippetlines.size() > 0) {
OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title()); OpensearchResponseWriter.removeSubsumedTitle(solrsnippetlines, node.dc_title());
final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, ""); final TextSnippet solrsnippet = new TextSnippet(node.url(), OpensearchResponseWriter.getLargestSnippet(solrsnippetlines), true, ResultClass.SOURCE_SOLR, "");
@ -1908,7 +1895,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
false); false);
final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal()); final String solrsnippetline = solrsnippet.descriptionline(this.getQuery().getQueryGoal());
final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal()); final String yacysnippetline = yacysnippet.descriptionline(this.getQuery().getQueryGoal());
URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet); final URIMetadataNode re = node.makeResultEntry(this.query.getSegment(), this.peers, solrsnippetline.length() > yacysnippetline.length() ? solrsnippet : yacysnippet);
addResult(re, localEntryElement.getWeight()); addResult(re, localEntryElement.getWeight());
success = true; success = true;
} else { } else {
@ -1955,8 +1942,8 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS)); // final long ranking = ((long) (score * 128.f)) + postRanking(resultEntry, this.getTopicNavigator(MAX_TOPWORDS));
resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api resultEntry.setScore(ranking); // update the score of resultEntry for access by search interface / api
this.resultList.put(new ReverseElement<URIMetadataNode>(resultEntry, ranking)); // remove smallest in case of overflow this.resultList.put(new ReverseElement<>(resultEntry, ranking)); // remove smallest in case of overflow
if (pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries. if (this.pollImmediately) this.resultList.poll(); // prevent re-ranking in case there is only a single index source which has already ranked entries.
this.addTopics(resultEntry); this.addTopics(resultEntry);
} }
@ -1984,7 +1971,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// apply citation count // apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother()); //System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
if (this.query.getSegment().connectedCitation()) { if (this.query.getSegment().connectedCitation()) {
int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash()); final int referencesCount = this.query.getSegment().urlCitation().count(rentry.hash());
r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; r += (128 * referencesCount / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation;
} }
// prefer hit with 'prefer' pattern // prefer hit with 'prefer' pattern
@ -2002,11 +1989,11 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances // the token map is used (instead of urlcomps/descrcomps) to determine appearance in url/title and eliminate double occurances
// (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !) // (example Title="News News News News News News - today is party -- News News News News News News" to add one score instead of 12 * score !)
for (final String urlcomp : urlcompmap) { for (final String urlcomp : urlcompmap) {
int tc = topwords.get(urlcomp); final int tc = topwords.get(urlcomp);
if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist; if (tc > 0) r += tc << this.query.ranking.coeff_urlcompintoplist;
} }
for (final String descrcomp : descrcompmap) { for (final String descrcomp : descrcompmap) {
int tc = topwords.get(descrcomp); final int tc = topwords.get(descrcomp);
if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist; if (tc > 0) r += tc << this.query.ranking.coeff_descrcompintoplist;
} }
@ -2037,10 +2024,10 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
} }
// load snippet // load snippet
ContentDomain contentDomain = page.getContentDomain(); final ContentDomain contentDomain = page.getContentDomain();
if (contentDomain == Classification.ContentDomain.TEXT || contentDomain == Classification.ContentDomain.ALL) { if (contentDomain == Classification.ContentDomain.TEXT || contentDomain == Classification.ContentDomain.ALL) {
// attach text snippet // attach text snippet
long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();
final TextSnippet snippet = new TextSnippet( final TextSnippet snippet = new TextSnippet(
this.loader, this.loader,
page, page,
@ -2110,7 +2097,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* For local only search, a new SearchEvent should be created, starting directly at the requested offset, * For local only search, a new SearchEvent should be created, starting directly at the requested offset,
* thus allowing to handle last pages of large resultsets * thus allowing to handle last pages of large resultsets
*/ */
int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded. final int nextitems = item - this.localsolroffset + this.query.itemsPerPage; // example: suddenly switch to item 60, just 10 had been shown, 20 loaded.
if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}} if (this.localsolrsearch != null && this.localsolrsearch.isAlive()) {try {this.localsolrsearch.join();} catch (final InterruptedException e) {}}
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
// Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added // Do not increment again navigators from the local Solr on next local pages retrieval, as facets counts scope is on the total results and should already have been added
@ -2175,43 +2162,43 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
/** Image results counter */ /** Image results counter */
private int imagePageCounter = 0; private int imagePageCounter = 0;
private LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<String, ImageResult>(); private final LinkedHashMap<String, ImageResult> imageViewed = new LinkedHashMap<>();
private LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<String, ImageResult>(); private final LinkedHashMap<String, ImageResult> imageSpareGood = new LinkedHashMap<>();
private LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<String, ImageResult>(); private final LinkedHashMap<String, ImageResult> imageSpareBad = new LinkedHashMap<>();
private ImageResult nthImage(int item) { private ImageResult nthImage(int item) {
Object o = SetTools.nth(this.imageViewed.values(), item); final Object o = SetTools.nth(this.imageViewed.values(), item);
if (o == null) return null; if (o == null) return null;
return (ImageResult) o; return (ImageResult) o;
} }
private boolean hasSpare() { private boolean hasSpare() {
return imageSpareGood.size() > 0 || imageSpareBad.size() > 0; return this.imageSpareGood.size() > 0 || this.imageSpareBad.size() > 0;
} }
private boolean containsSpare(String id) { private boolean containsSpare(String id) {
return imageSpareGood.containsKey(id) || imageSpareBad.containsKey(id); return this.imageSpareGood.containsKey(id) || this.imageSpareBad.containsKey(id);
} }
private int sizeSpare() { private int sizeSpare() {
return imageSpareGood.size() + imageSpareBad.size(); return this.imageSpareGood.size() + this.imageSpareBad.size();
} }
private ImageResult nextSpare() { private ImageResult nextSpare() {
if (imageSpareGood.size() > 0) { if (this.imageSpareGood.size() > 0) {
Map.Entry<String, ImageResult> next = imageSpareGood.entrySet().iterator().next(); final Map.Entry<String, ImageResult> next = this.imageSpareGood.entrySet().iterator().next();
imageViewed.put(next.getKey(), next.getValue()); this.imageViewed.put(next.getKey(), next.getValue());
imageSpareGood.remove(next.getKey()); this.imageSpareGood.remove(next.getKey());
return next.getValue(); return next.getValue();
} }
if (imageSpareBad.size() > 0) { if (this.imageSpareBad.size() > 0) {
Map.Entry<String, ImageResult> next = imageSpareBad.entrySet().iterator().next(); final Map.Entry<String, ImageResult> next = this.imageSpareBad.entrySet().iterator().next();
imageViewed.put(next.getKey(), next.getValue()); this.imageViewed.put(next.getKey(), next.getValue());
imageSpareBad.remove(next.getKey()); this.imageSpareBad.remove(next.getKey());
return next.getValue(); return next.getValue();
} }
return null; return null;
} }
public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException { public ImageResult oneImageResult(final int item, final long timeout, final boolean strictContentDom) throws MalformedURLException {
if (item < imageViewed.size()) return nthImage(item); if (item < this.imageViewed.size()) return nthImage(item);
if (imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare if (this.imageSpareGood.size() > 0) return nextSpare(); // first put out all good spare, but no bad spare
URIMetadataNode doc = oneResult(imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare final URIMetadataNode doc = oneResult(this.imagePageCounter++, timeout); // we must use a different counter here because the image counter can be higher when one page filled up several spare
// check if the match was made in the url or in the image links // check if the match was made in the url or in the image links
if (doc == null) { if (doc == null) {
if (hasSpare()) return nextSpare(); if (hasSpare()) return nextSpare();
@ -2231,45 +2218,45 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
// check image size // check image size
final Collection<Object> height = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName()); final Collection<Object> height = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
final Collection<Object> width = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName()); final Collection<Object> width = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown final int h = height == null ? 0 : (Integer) height.iterator().next(); // might be -1 for unknown
int w = width == null ? 0 : (Integer) width.iterator().next(); final int w = width == null ? 0 : (Integer) width.iterator().next();
if ((h <= 0 || h > 16) && (w <= 0 || w > 16)) { // we don't want too small images (< 16x16) if ((h <= 0 || h > 16) && (w <= 0 || w > 16)) { // we don't want too small images (< 16x16)
if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0)); if (!this.imageViewed.containsKey(id) && !containsSpare(id)) this.imageSpareGood.put(id, new ImageResult(doc.url(), doc.url(), doc.mime(), doc.title(), w, h, 0));
} }
} }
} else if(!strictContentDom) { } else if(!strictContentDom) {
Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName()); final Collection<Object> altO = doc.getFieldValues(CollectionSchema.images_alt_sxt.getSolrFieldName());
Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName()); final Collection<Object> imgO = doc.getFieldValues(CollectionSchema.images_urlstub_sxt.getSolrFieldName());
if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) { if (imgO != null && imgO.size() > 0 && imgO instanceof List<?>) {
List<Object> alt = altO == null ? null : (List<Object>) altO; final List<Object> alt = altO == null ? null : (List<Object>) altO;
List<Object> img = (List<Object>) imgO; final List<Object> img = (List<Object>) imgO;
List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size()); final List<String> prt = CollectionConfiguration.indexedList2protocolList(doc.getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), img.size());
Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName()); final Collection<Object> heightO = doc.getFieldValues(CollectionSchema.images_height_val.getSolrFieldName());
Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName()); final Collection<Object> widthO = doc.getFieldValues(CollectionSchema.images_width_val.getSolrFieldName());
List<Object> height = heightO == null ? null : (List<Object>) heightO; final List<Object> height = heightO == null ? null : (List<Object>) heightO;
List<Object> width = widthO == null ? null : (List<Object>) widthO; final List<Object> width = widthO == null ? null : (List<Object>) widthO;
for (int c = 0; c < img.size(); c++) { for (int c = 0; c < img.size(); c++) {
String image_urlstub = (String) img.get(c); final String image_urlstub = (String) img.get(c);
/* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents, /* Icons are not always .ico files and should now be indexed in icons_urlstub_sxt. But this test still makes sense for older indexed documents,
* or documents coming from previous versions peers */ * or documents coming from previous versions peers */
if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic if (image_urlstub.endsWith(".ico")) continue; // we don't want favicons, makes the result look idiotic
try { try {
int h = height == null ? 0 : (Integer) height.get(c); final int h = height == null ? 0 : (Integer) height.get(c);
int w = width == null ? 0 : (Integer) width.get(c); final int w = width == null ? 0 : (Integer) width.get(c);
// check size good for display (parser may init unknown dimension with -1) // check size good for display (parser may init unknown dimension with -1)
if (h > 0 && h <= 16) continue; // to small for display if (h > 0 && h <= 16) continue; // to small for display
if (w > 0 && w <= 16) continue; // to small for display if (w > 0 && w <= 16) continue; // to small for display
DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub); final DigestURL imageUrl = new DigestURL((prt != null && prt.size() > c ? prt.get(c) : "http") + "://" + image_urlstub);
String id = ASCII.String(imageUrl.hash()); final String id = ASCII.String(imageUrl.hash());
if (!imageViewed.containsKey(id) && !containsSpare(id)) { if (!this.imageViewed.containsKey(id) && !containsSpare(id)) {
String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : ""; final String image_alt = (alt != null && alt.size() > c) ? (String) alt.get(c) : "";
ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0); final ImageResult imageResult = new ImageResult(doc.url(), imageUrl, "", image_alt, w, h, 0);
boolean match = (query.getQueryGoal().matches(image_urlstub) || query.getQueryGoal().matches(image_alt)); final boolean match = (this.query.getQueryGoal().matches(image_urlstub) || this.query.getQueryGoal().matches(image_alt));
if (match) imageSpareGood.put(id, imageResult); else imageSpareBad.put(id, imageResult); if (match) this.imageSpareGood.put(id, imageResult); else this.imageSpareBad.put(id, imageResult);
} }
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
continue; continue;
} }
} }
@ -2303,7 +2290,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
int i = 0; int i = 0;
while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) { while (this.resultList.sizeAvailable() < this.query.neededResults() && System.currentTimeMillis() < timeout) {
URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis()); final URIMetadataNode re = oneResult(i++, timeout - System.currentTimeMillis());
if (re == null) break; if (re == null) break;
} }
return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable())); return this.resultList.list(Math.min(this.query.neededResults(), this.resultList.sizeAvailable()));
@ -2331,7 +2318,7 @@ public final class SearchEvent implements ScoreMapUpdatesListener {
* because they were not supposed to be here. If really necessary to keep them, * because they were not supposed to be here. If really necessary to keep them,
* growing the maxSize of the resultList should be considered here. * growing the maxSize of the resultList should be considered here.
*/ */
WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue(); final WeakPriorityBlockingQueue.Element<URIMetadataNode> initialLastResult = this.resultList.getLastInQueue();
/* /*
* Drain stacks in two steps (Solr, then RWI), because one stack might still * Drain stacks in two steps (Solr, then RWI), because one stack might still