removed ContentControl servlet and functinality

This was not used at all (as I know) and was blocking a smooth
integration of ivy in the context of an existing JSON parser.
This commit is contained in:
Michael Peter Christen 2022-09-28 17:25:04 +02:00
parent b54f4ad35f
commit fc98ca7a9c
24 changed files with 1915 additions and 3010 deletions

View File

@ -3,119 +3,6 @@
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/|solr/collection1/|api/blacklists/|proxymsg/|p2p/" kind="src" path="htroot"/>
<classpathentry excluding="bookmarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/|blacklists/" kind="src" path="htroot/api"/>
<classpathentry excluding="posts/|tags/|xbel/" kind="src" path="htroot/api/bookmarks"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attribute name="module" value="true"/>
<classpathentry kind="lib" path="lib/apache-mime4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/avatica-core-1.13.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-core-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/calcite-linq4j-1.18.0.jar"/>
<classpathentry kind="lib" path="lib/jchardet-1.0.jar"/>
<classpathentry kind="lib" path="lib/common-image-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-io-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/common-lang-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/commons-codec-1.14.jar"/>
<classpathentry kind="lib" path="lib/commons-fileupload-1.4.jar"/>
<classpathentry kind="lib" path="lib/commons-io-2.7.jar"/>
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
<classpathentry kind="lib" path="lib/commons-lang3-3.12.0.jar"/>
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
<classpathentry kind="lib" path="lib/commons-math3-3.4.1.jar"/>
<classpathentry kind="lib" path="lib/fontbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/http2-client-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-common-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/http2-http-client-transport-9.4.34.v20201102.jar"/>
<classpathentry kind="lib" path="lib/httpclient-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/httpcore-4.4.13.jar"/>
<classpathentry kind="lib" path="lib/httpmime-4.5.12.jar"/>
<classpathentry kind="lib" path="lib/icu4j-63.1.jar"/>
<classpathentry kind="lib" path="lib/imageio-bmp-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-core-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-metadata-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/imageio-tiff-3.3.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-annotations-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-core-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/jackson-databind-2.11.2.jar"/>
<classpathentry kind="lib" path="lib/oro-2.0.8.jar"/>
<classpathentry kind="lib" path="lib/jaudiotagger-2.2.5.jar"/>
<classpathentry kind="lib" path="lib/javax.servlet-api-3.1.0.jar"/>
<classpathentry kind="lib" path="lib/jcifs-1.3.17.jar"/>
<classpathentry kind="lib" path="lib/jcl-over-slf4j-1.7.25.jar"/>
<classpathentry kind="lib" path="lib/jetty-client-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-continuation-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-deploy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-http-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-io-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-jmx-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-proxy-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-security-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-server-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlet-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-servlets-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-util-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-webapp-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jetty-xml-9.4.35.v20201120.jar"/>
<classpathentry kind="lib" path="lib/jsch-0.1.54.jar"/>
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jsonic-1.3.10.jar"/>
<classpathentry kind="lib" path="lib/jwat-archive-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-common-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-gzip-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/jwat-warc-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-phonetic-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-backward-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-classification-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-codecs-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-grouping-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-join-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-misc-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queries-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-spatial-extras-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/lucene-suggest-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/opentracing-api-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-noop-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/opentracing-util-0.33.0.jar"/>
<classpathentry kind="lib" path="lib/metadata-extractor-2.11.0.jar"/>
<classpathentry kind="lib" path="lib/metrics-core-3.2.2.jar"/>
<classpathentry kind="lib" path="lib/metrics-jmx-4.1.5.jar"/>
<classpathentry kind="lib" path="lib/org.restlet.jar"/>
<classpathentry kind="lib" path="lib/pdfbox-2.0.15.jar"/>
<classpathentry kind="lib" path="lib/poi-3.17.jar"/>
<classpathentry kind="lib" path="lib/poi-scratchpad-3.17.jar"/>
<classpathentry kind="lib" path="lib/rrd4j-3.2.jar"/>
<classpathentry kind="lib" path="lib/solr-core-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/solr-solrj-8.8.1.jar"/>
<classpathentry kind="lib" path="lib/spatial4j-0.6.jar"/>
<classpathentry kind="lib" path="lib/stax2-api-3.1.4.jar"/>
<classpathentry kind="lib" path="lib/weupnp-0.1.4.jar"/>
<classpathentry kind="lib" path="lib/woodstox-core-asl-4.4.1.jar"/>
<classpathentry kind="lib" path="lib/xml-apis-1.4.01.jar"/>
<classpathentry kind="lib" path="lib/xmpcore-5.1.3.jar"/>
<classpathentry kind="lib" path="lib/xz-1.8.jar"/>
<classpathentry kind="lib" path="lib/zookeeper-3.4.14.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-core-2.2.jar"/>
<classpathentry kind="lib" path="libt/hamcrest-library-2.2.jar"/>
<classpathentry kind="lib" path="lib/commons-collections4-4.4.jar"/>
<classpathentry kind="lib" path="lib/guava-25.1-jre.jar"/>
<classpathentry kind="lib" path="lib/hazelcast-4.2.jar"/>
<classpathentry kind="lib" path="lib/commons-compress-1.21.jar"/>
<classpathentry kind="lib" path="lib/bcmail-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcpkix-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/bcprov-jdk15on-1.69.jar"/>
<classpathentry kind="lib" path="lib/jsoup-1.14.2.jar"/>
<classpathentry kind="lib" path="lib/log4j-over-slf4j-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-api-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/slf4j-jdk14-1.7.32.jar"/>
<classpathentry kind="lib" path="lib/langdetect-1.1-20120112.jar"/>
<classpathentry kind="src" path="htroot/api/blacklists"/>
<classpathentry kind="src" path="htroot/api/bookmarks/posts"/>
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
@ -126,6 +13,16 @@
<classpathentry kind="src" path="htroot/yacy"/>
<classpathentry kind="src" path="source"/>
<classpathentry kind="src" path="test/java"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER">
<attribute name="module" value="true"/>
<classpathentry kind="con" path="org.apache.ivyde.eclipse.cpcontainer.IVYDE_CONTAINER/?project=yacy&amp;ivyXmlPath=ivy.xml&amp;confs=compile">
<attribute name="module" value="true"/>
<classpathentry kind="lib" path="lib/J7Zip-modified-1.0.2.jar"/>
<classpathentry kind="output" path="gen"/>

View File

@ -23,5 +23,6 @@

View File

@ -1370,16 +1370,6 @@ core.service.webgraph.tmp = false
parserAugmentation = false
parserAugmentation.RDFa = false
# Content control settings
contentcontrol.enabled = false
contentcontrol.bookmarklist = contentcontrol
contentcontrol.mandatoryfilterlist = yacy
contentcontrol.smwimport.enabled = false
contentcontrol.smwimport.baseurl =
contentcontrol.smwimport.purgelistoninit = true
contentcontrol.smwimport.targetlist = contentcontrol
contentcontrol.smwimport.defaultcategory = yacy
# host browser settings
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
browser.autoload = false

View File

@ -1,95 +0,0 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "">
<html xmlns="">
<title>YaCy '#[clientname]#': Content Control</title>
<body id="Settings">
<h2>Content Control</h2>
<form id="contentcontrolsettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="augmentation">Peer Content Control URL Filter</legend>
With this settings you can activate or deactivate content control on this peer.
<dt><label for="content">Use content control filtering:</label></dt>
<input type="checkbox" name="contentcontrolenabled" id="contentcontrolenabled" #(contentcontrolenabled_checked)#:: checked="checked"#(/contentcontrolenabled_checked)# />Enabled<br/>
<p class="help">
Enables or disables content control.
<dt><label for="content">Use this table to create filter:</label></dt>
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
<p class="help">
Define a table. Default: contentcontrol
<dd><input type="submit" name="contentcontrolSettings" value="Submit" class="btn btn-primary"/></dd>
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="urlproxy">Content Control SMW Import Settings</legend>
With this settings you can define the content control import settings. You can define a <a href="" target="_blank">Semantic Media Wiki with the appropriate extensions.</a>
<dt><label for="content">SMW import to content control list:</label></dt>
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
<p class="help">
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!
<dt><label for="content">SMW import base URL:</label></dt>
<input type="text" name="ccsmwimporturl" value="#[ccsmwimporturl]#" size="60" /><br/><br/>
<p class="help">
Define base URL for SMW special page "Ask". Example:
<dt><label for="content">SMW import target table:</label></dt>
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
<p class="help">
Define import target table. Default: contentcontrol
<dt><label for="content">Purge content control list on initial sync:</label></dt>
<input type="checkbox" name="ccsmwpurge" id="ccsmwpurge" #(ccsmwpurge_checked)#:: checked="checked"#(/ccsmwpurge_checked)# />Enabled<br/>
<p class="help">
Purge content control list on initial synchronisation after startup.
<dd><input type="submit" name="contentcontrolExtraSettings" value="Submit" class="btn btn-primary"/></dd>

View File

@ -1,68 +0,0 @@
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public final class ContentControl_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
if (post != null) {
if (post.containsKey("contentcontrolExtraSettings")) {
"on".equals(post.get("ccsmwimport")) ? true : false);
"on".equals(post.get("ccsmwpurge")) ? true : false);
if (post.containsKey("contentcontrolSettings")) {
"on".equals(post.get("contentcontrolenabled")) ? true : false);
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
prop.put("ccsmwpurge_checked", env.getConfigBool(
"contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0");
env.getConfig("contentcontrol.smwimport.baseurl", ""));
prop.put("ccsmwimport_checked", env.getConfigBool(
"contentcontrol.smwimport.enabled", false) ? "1" : "0");
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
env.getConfig("contentcontrol.bookmarklist", ""));
// return rewrite properties
return prop;

View File

@ -5,6 +5,5 @@
<li><a href="BlacklistCleaner_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Cleaner</a></li>
<li><a href="BlacklistTest_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Blacklist Test</a></li>
<li><a href="BlacklistImpExp_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Import/Export</a></li>
<li><a href="ContentControl_p.html" class="MenuItemLink #(authorized)#lock::unlock#(/authorized)#">Content Control</a></li>

View File

@ -13,7 +13,6 @@
<dependency org="com.cybozu.labs" name="langdetect" rev="1.1-20120112" conf="compile->master"/>
<dependency org="com.drewnoakes" name="metadata-extractor" rev="2.11.0" />
<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.11.2"/>
<dependency org="com.googlecode.json-simple" name="json-simple" rev="1.1.1" conf="compile->master" />
<dependency org="" name="guava" rev="25.1-jre" conf="compile->master"/>
<dependency org="com.hazelcast" name="hazelcast" rev="4.2" />
<dependency org="" name="icu4j" rev="63.1"/>
@ -94,7 +93,6 @@
<!-- This does not match langdetect.jar from pre-ivy -->
<dependency org="org.tukaani" name="xz" rev="1.8"/>
<dependency org="oro" name="oro" rev="2.0.8"/>
<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/>
<dependency org="junit" name="junit" rev="4.13" conf="test->default"/>
<dependency org="org.hamcrest" name="hamcrest" rev="2.2" conf="test->default"/>

View File

@ -953,30 +953,6 @@ Duration==Dauer
#File: ContentControl_p.html
Content Control<==Inhaltskontrolle<
Peer Content Control URL Filter==Peer Inhaltskontrolle URL Filter
With this settings you can activate or deactivate content control on this peer.==Mit dieser Einstellung kann die Inhaltskontrolle auf diesem Peer an- oder abgeschalten werden.
Use content control filtering:==Verwende Inhaltskontrollfilter:
Enables or disables content control.==Schaltet Inhaltskontrolle an- oder ab.
Use this table to create filter:==Verwenden Sie diese Tabelle, um Filter zu erzeugen:
Define a table. Default:==Definieren Sie ein Tabelle. Standardeinstellung:
Content Control SMW Import Settings==Inhaltskontrolle SMW Importeinstellungen
With this settings you can define the content control import settings. You can define a==Mit diesen Einstellungen können Sie die Importeinstellungen für die Inhaltskontrolle definieren. Definieren Sie ein
Semantic Media Wiki with the appropriate extensions.==Semantisches Media Wiki mit den passenden Erweiterungen.
SMW import to content control list:==SMW Import für die Inhalts-Kontroll-Liste:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Konstante Synchronisation der Inhalts-Kontroll-Liste vom SMW (Semantisches Medienwiki) im Hintergrund. Benötigt Neustart!
SMW import base URL:==SMW Import Basis URL:
Define base URL for SMW special page "Ask". Example: ==Definiere Basis URL für SMW Spezialseite "Ask". Beispiel:
SMW import target table:==SMW Import Ziele Tabelle:
Define import target table. Default: contentcontrol==Definieren Import Ziel Tabelle. Standardeinstellung: contentcontrol
Purge content control list on initial sync:==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation:
Purge content control list on initial synchronisation after startup.==Verwerfe Inhalts-Kontroll-Listen bei der ersten Synchronisation nach dem Start.
#File: CookieMonitorIncoming_p.html

View File

@ -530,13 +530,6 @@ Duration==Duración
#File: ContentControl_p.html
Content Control<==Control de contenido<
#File: CookieMonitorIncoming_p.html

View File

@ -510,13 +510,6 @@ Duration==Durata
#File: ContentControl_p.html
Content Control<==Controllo dei contenuti<
#File: CookieMonitorIncoming_p.html

View File

@ -714,13 +714,6 @@ Last Deploy==最後の展開
Connection Tracking==接続の追跡
#File: ContentControl_p.html
Content Control<==コンテントの制御<
#File: CookieMonitorIncoming_p.html
Incoming Cookies Monitor==着信したCookieのモニター

View File

@ -2429,71 +2429,6 @@
<file original="ContentControl_p.html" source-language="en" datatype="html">
<trans-unit id="3f3b9286" xml:space="preserve" approved="no" translate="yes">
<source>Content Control&lt;</source>
<trans-unit id="d21676d1" xml:space="preserve" approved="no" translate="yes">
<source>Peer Content Control URL Filter</source>
<trans-unit id="542e1ecb" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can activate or deactivate content control on this peer.</source>
<trans-unit id="2bd01413" xml:space="preserve" approved="no" translate="yes">
<source>Use content control filtering:</source>
<trans-unit id="4e4f2379" xml:space="preserve" approved="no" translate="yes">
<trans-unit id="ff54fe20" xml:space="preserve" approved="no" translate="yes">
<source>Enables or disables content control.</source>
<trans-unit id="81cdc1a8" xml:space="preserve" approved="no" translate="yes">
<source>Use this table to create filter:</source>
<trans-unit id="2a641f75" xml:space="preserve" approved="no" translate="yes">
<source>Define a table. Default:</source>
<trans-unit id="c3a262b1" xml:space="preserve" approved="no" translate="yes">
<source>Content Control SMW Import Settings</source>
<trans-unit id="fe0fc485" xml:space="preserve" approved="no" translate="yes">
<source>With this settings you can define the content control import settings. You can define a</source>
<trans-unit id="a00319d4" xml:space="preserve" approved="no" translate="yes">
<source>Semantic Media Wiki with the appropriate extensions.</source>
<trans-unit id="3f00f0c5" xml:space="preserve" approved="no" translate="yes">
<source>SMW import to content control list:</source>
<trans-unit id="446815ef" xml:space="preserve" approved="no" translate="yes">
<source>Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!</source>
<trans-unit id="d9bff282" xml:space="preserve" approved="no" translate="yes">
<source>SMW import base URL:</source>
<trans-unit id="ecfbe3e8" xml:space="preserve" approved="no" translate="yes">
<source>Define base URL for SMW special page "Ask". Example: </source>
<trans-unit id="d0d7e963" xml:space="preserve" approved="no" translate="yes">
<source>SMW import target table:</source>
<trans-unit id="84acd3e4" xml:space="preserve" approved="no" translate="yes">
<source>Define import target table. Default: contentcontrol</source>
<trans-unit id="70ed825" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial sync:</source>
<trans-unit id="642de9e8" xml:space="preserve" approved="no" translate="yes">
<source>Purge content control list on initial synchronisation after startup.</source>
<trans-unit id="bfcc5088" xml:space="preserve" approved="no" translate="yes">
<file original="ContentIntegrationPHPBB3_p.html" source-language="en" datatype="html">
<trans-unit id="c7bfa2ca" xml:space="preserve" approved="no" translate="yes">

View File

@ -1059,30 +1059,6 @@ Duration==Длительность
#File: ContentControl_p.html
Content Control<==Управление контентом<
Peer Content Control URL Filter==Управление контентом узла
With this settings you can activate or deactivate content control on this peer.==Эти настройки позволяют включить или отключить управление контентом для вашего узла.
Use content control filtering:==Использовать фильтр управления контентом:
Enables or disables content control.==Включение или отключение управления контентом.
Use this table to create filter:==Использовать это поле для создания фильтра:
Define a table. Default:==Задать значение поля. По-умолчанию:
Content Control SMW Import Settings==Импорт настроек управления контентом SMW
With this settings you can define the content control import settings. You can define a==Эти настройки позволяют задать параметры импорта настроек управления контентом
Semantic Media Wiki with the appropriate extensions.==Semantic Media Wiki с соответствующими расширениями.
SMW import to content control list:== Импорт SMW в список управления контентом:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==Включение или отключение постоянной фоновой синхронизации списка управления контентом из SMW (Semantic Mediawiki). Потребуется перезапуск программы!
SMW import base URL:==Ссылка на импортируемую базу SMW:
Define base URL for SMW special page "Ask". Example: ==Укажите ссылку на базу SMW на специальной странице "Ask". Например:
SMW import target table:==Поле назначения импорта SMW:
Define import target table. Default: contentcontrol==Укажите поле назначения импорта. По-умолчанию: contentcontrol
Purge content control list on initial sync:==Удалить список управления контентом в начале синхронизации:
Purge content control list on initial synchronisation after startup.==Удалить список управления контентом в начале синхронизации после запуска программы.
#File: CookieMonitorIncoming_p.html

View File

@ -1033,31 +1033,6 @@ For minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLe
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number==quantRate是参与签名计算的单词数量的度量。 数字越高,越少
#File: ContentControl_p.html
Content Control<==内容控制<
Peer Content Control URL Filter==节点内容控制地址过滤器
With this settings you can activate or deactivate content control on this peer==使用此设置你可以激活或取消激活此YaCy节点上的内容控制
Use content control filtering:==使用内容控制过滤:
Enables or disables content control==启用或禁用内容控制
Use this table to create filter:==使用此表创建过滤器:
Define a table. Default:==定义一个表格. 默认:
Content Control SMW Import Settings==内容控制SMW导入设置
With this settings you can define the content control import settings. You can define a==使用此设置,你可以定义内容控制导入设置. 你可以定义一个
Semantic Media Wiki with the appropriate extensions==语义媒体百科与适当的扩展
SMW import to content control list:==SMW导入到内容控制列表:
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!==启用或禁用来自SMWSemantic Mediawiki的内容控制列表的恒定后台同步。 需要重启!
SMW import base URL:==SMW导入基URL:
Define base URL for SMW special page "Ask". Example: ==为SMW特殊页面“Ask”定义基础地址.例:
SMW import target table:==SMW导入目标表:
Define import target table. Default: contentcontrol==定义导入目标表. 默认值:contentcontrol
Purge content control list on initial sync:==在初始同步时清除内容控制列表:
Purge content control list on initial synchronisation after startup.==重启后,清除初始同步的内容控制列表.
Define base URL for SMW special page "Ask". Example:==为SMW特殊页面“Ask”定义基础地址.例:
#File: ContentIntegrationPHPBB3_p.html
Content Integration: Retrieval from phpBB3 Databases==内容集成: 从phpBB3数据库中导入

View File

@ -1,90 +0,0 @@
package net.yacy.contentcontrol;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.repository.FilterEngine;
public class ContentControlFilterUpdateThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private static FilterEngine networkfilter;
public ContentControlFilterUpdateThread(final Switchboard sb) { = sb;
public final void run() {
if (!this.locked) {
this.locked = true;
if ("contentcontrol.enabled", false) == true) {
if (SMWListSyncThread.dirty) {
networkfilter = updateFilter();
SMWListSyncThread.dirty = false;
this.locked = false;
private static FilterEngine updateFilter () {
FilterEngine newfilter = new FilterEngine();
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
try {
it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist",
while (it.hasNext()) {
Row b =;
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
} catch (final IOException e) {
// TODO Auto-generated catch block
return newfilter;
public static FilterEngine getNetworkFilter() {
FilterEngine f = networkfilter;
if (f != null && f.size() > 0)
return f;
return null;

View File

@ -1,163 +0,0 @@
package net.yacy.contentcontrol;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private SMWListRow row;
private final JSONParser parser;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
private final HashMap<String,String> obj;
private Boolean isElement;
public SMWListImporter(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.row = new SMWListRow();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
this.obj = new HashMap<String,String>();
this.isElement = false;
public void startJSON() throws ParseException, IOException {
public void endJSON() throws ParseException, IOException {
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if (key.equals("items")) {
this.isElement = true;
return true;
public boolean endArray() throws ParseException, IOException {
return true;
public boolean startObject() throws ParseException, IOException {
return true;
public boolean endObject() throws ParseException, IOException {
if(this.isElement) {
for (Entry<String, String> e: this.obj.entrySet()) {
this.row.add (e.getKey(), e.getValue());
try {
} catch (final InterruptedException e) {
this.row = new SMWListRow();
return true;
public boolean startObjectEntry(String key) throws ParseException, IOException {
return true;
public boolean primitive(Object value) throws ParseException, IOException {
if(value instanceof java.lang.String) {
} else if(value instanceof java.lang.Boolean) {
} else if(value instanceof java.lang.Number) {
return true;
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
public void run() {
try {"SMWLISTSYNC", "Importer run()");
this.parser.parse(this.importFile, this, true);
} catch (final IOException e) {
} catch (final ParseException e) {
} finally {
try {"SMWLISTSYNC", "Importer inserted poison pill in queue");
} catch (final InterruptedException e) {
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
return null;

View File

@ -1,117 +0,0 @@
package net.yacy.contentcontrol;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.util.ConcurrentLog;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporterFormatObsolete implements Runnable{
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private final JSONParser parser;
public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.parser = new JSONParser();
public void run() {
try {"SMWLISTSYNC", "Importer run()");
Object obj = this.parser.parse(this.importFile);
JSONObject jsonObject = (JSONObject) obj;
JSONArray items = (JSONArray) jsonObject.get("items");
Iterator<JSONObject> iterator = items.iterator();
while (iterator.hasNext()) {
this.parseItem (;
} catch (final IOException e) {
} catch (final ParseException e) {
} finally {
try {"SMWLISTSYNC", "Importer inserted poison pill in queue");
} catch (final InterruptedException e) {
private void parseItem(JSONObject jsonObject) {
try {
SMWListRow row = new SMWListRow();
Iterator<String> iterator = jsonObject.keySet().iterator();
while (iterator.hasNext()) {
String entryKey =;
Object value = jsonObject.get (entryKey);
String valueKey = "";
if (value instanceof java.lang.String) {
valueKey = value.toString();
} else if (value instanceof JSONArray) {
valueKey = jsonListAll ((JSONArray) value);
row.add (entryKey, valueKey);
} catch (final Exception e) {"SMWLISTSYNC", "import of entry failed");
private String jsonListAll(JSONArray value) {
String res = "";
Iterator<Object> iterator = value.listIterator();
while (iterator.hasNext()) {
Object val =;
res += val.toString()+",";
if (res.endsWith (",")) {
res = res.substring (0, res.length()-1);
return res;
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (final InterruptedException e) {
return null;

View File

@ -1,24 +0,0 @@
package net.yacy.contentcontrol;
import net.yacy.kelondro.blob.Tables;
public class SMWListRow {
private Tables.Data data;
public static final SMWListRow POISON = new SMWListRow();
public static final SMWListRow EMPTY = new SMWListRow();
public SMWListRow() { = new Tables.Data();
public void add (String key, String value) {, value);
public Tables.Data getData() {

View File

@ -1,201 +0,0 @@
package net.yacy.contentcontrol;
import java.nio.charset.StandardCharsets;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
public class SMWListSyncThread implements Runnable {
private final Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
private String targetList;
private String parameters;
private String query;
public static Boolean dirty = false;
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) { = sb;
this.targetList = targetList;
this.parameters = parameters;
this.query = query;
if (purgeOnInit) {;
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
public final void run() {
if (!this.locked) {
this.locked = true;
if ("contentcontrol.smwimport.enabled", false) == true) {
if (!this.runningjob) {
// we have to count all new elements first
try {
if (!"contentcontrol.smwimport.baseurl","").equals("")) {
URL urlCount;
urlCount = new URL(
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/offset%3D0"
+ "/limit%3D200000"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent).GETbytes(urlCount.toString(), null, null, false));
String overallcount = CommonPattern.COMMA.split(reply)[0];
String lastsyncstring = CommonPattern.COMMA.split(reply)[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {"SMWLISTSYNC",
"import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
} else {
"No SMWimport URL defined");
} catch (final MalformedURLException e) {
// TODO Auto-generated catch block
} catch (final IOException e) {
// TODO Auto-generated catch block
} else {
// there are new elements to be imported"SMWLISTSYNC",
"importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL urlImport;
try {
if (!"contentcontrol.smwimport.baseurl","").equals("")) {
urlImport = new URL(
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/syntax%3Dobsolete"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
urlImport.openStream(), StandardCharsets.UTF_8);
} catch (final Exception e) {
this.runningjob = false;
if (reader != null) {
SMWListImporterFormatObsolete smwListImporter = null;
try {
smwListImporter = new SMWListImporterFormatObsolete(
reader, 200);
} catch (final Exception e) {
// TODO: display an error message
this.runningjob = false;
Thread t;
SMWListRow row;
t = new Thread(smwListImporter,"SMW List Importer");
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
if (row == SMWListRow.EMPTY) {
this.runningjob = false;
} else {
try {, row.getData());
dirty = true;
} catch (final Exception e) {
// TODO Auto-generated catch block
} catch (final MalformedURLException e2) {
// TODO Auto-generated catch block
this.locked = false;

View File

@ -26,31 +26,31 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import net.yacy.cora.federate.solr.instance.ServerShard;
import net.yacy.cora.util.ConcurrentLog;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.CommonParams;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName());
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<Byte>(0, true);
public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<>(0, true);
// pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks
// this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks
static {
@ -158,8 +158,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public void deleteByIds(final Collection<String> ids) throws IOException {
if (this.server == null) return;
List<String> l = new ArrayList<String>();
for (String s: ids) l.add(s);
final List<String> l = new ArrayList<>();
for (final String s: ids) l.add(s);
synchronized (this.server) {
try {
this.server.deleteById(l, -1);
@ -247,7 +247,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException {
if (this.server == null) return;
for (SolrInputDocument solrdoc : solrdocs) {
for (final SolrInputDocument solrdoc : solrdocs) {
if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict"
synchronized (this.server) {
@ -278,8 +278,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
this.server.add(solrdocs, -1);
} catch (final Throwable ee) {
List<String> ids = new ArrayList<String>();
for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(;
final List<String> ids = new ArrayList<>();
for (final SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(;
log.warn(e.getMessage() + " IDs=" + ids.toString());
throw new IOException(ee);
@ -300,11 +300,11 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected");
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String q = params.get(CommonParams.Q);
String fq = params.get(CommonParams.FQ);
String sort = params.get(CommonParams.SORT);
String fl = params.get(CommonParams.FL);
String threadname = Thread.currentThread().getName();
final String q = params.get(CommonParams.Q);
final String fq = params.get(CommonParams.FQ);
final String sort = params.get(CommonParams.SORT);
final String fl = params.get(CommonParams.FL);
final String threadname = Thread.currentThread().getName();
QueryResponse rsp;
int retry = 0;
Throwable error = null;
@ -322,7 +322,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
clearCaches(); // prevent further OOM if this was caused by OOM
ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq));
try {Thread.sleep(1000);} catch (InterruptedException e) {}
try {Thread.sleep(1000);} catch (final InterruptedException e) {}
throw new IOException("Error executing query", error);
@ -342,10 +342,10 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
public int getSegmentCount() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
NamedList<Object> info = lukeResponse.getIndexInfo();
final LukeResponse lukeResponse = getIndexBrowser(false);
final NamedList<Object> info = lukeResponse.getIndexInfo();
if (info == null) return 0;
Integer segmentCount = (Integer) info.get("segmentCount");
final Integer segmentCount = (Integer) info.get("segmentCount");
if (segmentCount == null) return 1;
return segmentCount.intValue();
} catch (final Throwable e) {
@ -363,19 +363,19 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
if (this.server instanceof ServerShard) {
// the server can be a single shard; we don't know here
// to test that, we submit requests to bots variants
if (useluke == 1) return getSizeLukeRequest();
if (useluke == -1) return getSizeQueryRequest();
long ls = getSizeLukeRequest();
long qs = getSizeQueryRequest();
if (this.useluke == 1) return getSizeLukeRequest();
if (this.useluke == -1) return getSizeQueryRequest();
final long ls = getSizeLukeRequest();
final long qs = getSizeQueryRequest();
if (ls == 0 && qs == 0) {
// we don't know if this is caused by an error or not; don't change the useluke value
return 0;
if (ls == qs) {
useluke = 1;
this.useluke = 1;
return ls;
useluke = -1;
this.useluke = -1;
return qs;
return getSizeLukeRequest();
@ -398,9 +398,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
private long getSizeLukeRequest() {
if (this.server == null) return 0;
try {
LukeResponse lukeResponse = getIndexBrowser(false);
final LukeResponse lukeResponse = getIndexBrowser(false);
if (lukeResponse == null) return 0;
Integer numDocs = lukeResponse.getNumDocs();
final Integer numDocs = lukeResponse.getNumDocs();
if (numDocs == null) return 0;
return numDocs.longValue();
} catch (final Throwable e) {
@ -419,7 +419,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
LukeResponse lukeResponse = null;
try {
lukeResponse = lukeRequest.process(this.server);
} catch (IOException e) {
} catch (final IOException e) {
throw new SolrServerException(e.getMessage());
return lukeResponse;

View File

@ -27,10 +27,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer;
@ -38,17 +34,21 @@ import org.apache.solr.core.SolrCore;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.kelondro.util.MemoryControl;
public class EmbeddedInstance implements SolrInstance {
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
// additional a optional (or for 32bit systems is copied
// additional a optional (or for 32bit systems is copied
private CoreContainer coreContainer;
private String defaultCoreName;
private SolrCore defaultCore;
private SolrClient defaultCoreServer;
private File containerPath;
private Map<String, SolrCore> cores;
private Map<String, SolrClient> server;
private final String defaultCoreName;
private final SolrCore defaultCore;
private final SolrClient defaultCoreServer;
private final File containerPath;
private final Map<String, SolrCore> cores;
private final Map<String, SolrClient> server;
public EmbeddedInstance(final File solr_config, final File containerPath, String givenDefaultCoreName, String[] initializeCoreNames) throws IOException {
@ -56,30 +56,30 @@ public class EmbeddedInstance implements SolrInstance {
this.containerPath = containerPath;
// ensure that default core path exists
File defaultCorePath = new File(containerPath, givenDefaultCoreName);
final File defaultCorePath = new File(containerPath, givenDefaultCoreName);
if (!defaultCorePath.exists()) defaultCorePath.mkdirs();
// migrate old conf directory
File oldConf = new File(containerPath, "conf");
File confDir = new File(defaultCorePath, "conf");
final File oldConf = new File(containerPath, "conf");
final File confDir = new File(defaultCorePath, "conf");
if (oldConf.exists()) oldConf.renameTo(confDir);
// migrate old data directory
File oldData = new File(containerPath, "data");
File dataDir = new File(defaultCorePath, "data");
final File oldData = new File(containerPath, "data");
final File dataDir = new File(defaultCorePath, "data");
if (oldData.exists()) oldData.renameTo(dataDir);
// create index subdirectory in data if it does not exist
File indexDir = new File(dataDir, "index");
final File indexDir = new File(dataDir, "index");
if (!indexDir.exists()) indexDir.mkdirs();
// initialize the cores' configuration
for (String coreName: initializeCoreNames) {
for (final String coreName: initializeCoreNames) {
initializeCoreConf(solr_config, containerPath, coreName);
// initialize the coreContainer
File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
final File configFile = new File(solr_config, "solr.xml"); // the configuration file for all cores
this.coreContainer = CoreContainer.createAndLoad(containerPath.toPath(), configFile.toPath()); // this may take indefinitely long if solr files are broken
if (this.coreContainer == null) throw new IOException("cannot create core container dir = " + containerPath + ", configFile = " + configFile);
@ -94,9 +94,9 @@ public class EmbeddedInstance implements SolrInstance {
this.defaultCoreServer = new EmbeddedSolrServer(this.coreContainer, this.defaultCoreName);
// initialize core cache
this.cores = new ConcurrentHashMap<String, SolrCore>();
this.cores = new ConcurrentHashMap<>();
this.cores.put(this.defaultCoreName, this.defaultCore);
this.server = new ConcurrentHashMap<String, SolrClient>();
this.server = new ConcurrentHashMap<>();
this.server.put(this.defaultCoreName, this.defaultCoreServer);
@ -113,42 +113,42 @@ public class EmbeddedInstance implements SolrInstance {
private static void initializeCoreConf(final File solr_config, final File containerPath, String coreName) {
// ensure that default core path exists
File corePath = new File(containerPath, coreName);
final File corePath = new File(containerPath, coreName);
if (!corePath.exists()) corePath.mkdirs();
// check if exists in each path (thats new in Solr 5.0)
File core_properties = new File(corePath, "");
final File core_properties = new File(corePath, "");
if (!core_properties.exists()) {
// create the file
try (
/* Automatically closed by this try-with-resources statement */
FileOutputStream fos = new FileOutputStream(core_properties);
) {
/* Automatically closed by this try-with-resources statement */
FileOutputStream fos = new FileOutputStream(core_properties);
) {
fos.write(ASCII.getBytes("name=" + coreName + "\n"));
fos.write(ASCII.getBytes("collection=${collection:" + coreName + "}\n"));
} catch (IOException e) {
} catch (final IOException e) {
// ensure necessary subpaths exist
File conf = new File(corePath, "conf");
final File conf = new File(corePath, "conf");
File data = new File(corePath, "data");
final File data = new File(corePath, "data");
// (over-)write configuration into conf path
File source, target;
for (String cf: confFiles) {
for (final String cf: confFiles) {
source = new File(solr_config, cf);
if (source.isDirectory()) {
target = new File(conf, cf);
for (String cfl: source.list()) {
for (final String cfl: source.list()) {
try {
Files.copy(new File(source, cfl), new File(target, cfl));
} catch (final IOException e) {
@ -168,7 +168,7 @@ public class EmbeddedInstance implements SolrInstance {
// copy the
// for 32bit systems (os.arch name not containing '64') take the as if exists
String os = System.getProperty("os.arch");
final String os = System.getProperty("os.arch");
if (os.contains("64")) {
source = new File(solr_config, "");
} else {
@ -242,7 +242,7 @@ public class EmbeddedInstance implements SolrInstance {
public synchronized void close() {
for (SolrCore core: cores.values()) core.close();
for (final SolrCore core: this.cores.values()) core.close();
if (this.coreContainer != null) try {
this.coreContainer = null;

View File

@ -36,7 +36,6 @@ import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
@ -360,13 +359,13 @@ public final class CrawlStacker implements WorkflowTask<Request>{
final boolean proxy = (entry.initiator() == null || entry.initiator().length == 0 || ASCII.String(entry.initiator()).equals("------------")) && profile.handle().equals(this.crawler.defaultProxyProfile.handle());
final boolean remote = profile.handle().equals(this.crawler.defaultRemoteProfile.handle());
final boolean global =
(profile.remoteIndexing()) /* granted */ &&
(entry.depth() == profile.depth()) /* leaf node */ &&
//(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
(this.peers.mySeed().isSenior()) ||
) /* qualified */;
(profile.remoteIndexing()) /* granted */ &&
(entry.depth() == profile.depth()) /* leaf node */ &&
//(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
(this.peers.mySeed().isSenior()) ||
) /* qualified */;
if (!local && !global && !remote && !proxy) {
error = "URL '" + entry.url().toString() + "' cannot be crawled. initiator = " + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", profile.handle = " + profile.handle();
@ -424,7 +423,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (dbocc != null) {
String urls = url.toNormalform(false);
final String urls = url.toNormalform(false);
final long oldDate = this.indexSegment.getLoadTime(url.hash());
// deny urls that exceed allowed number of occurrences
@ -441,7 +440,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (this.log.isFine()) this.log.fine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + maxAllowedPagesPerDomain + " is allowed.");
return "result stack domain counter exceeded (test by domainCount)";
//final Long oldDate = oldEntry == null ? null :;
@ -453,7 +452,7 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (recrawl) {
if (CrawlStacker.log.isFine())
CrawlStacker.log.fine("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
((System.currentTimeMillis() - oldDate) / 60000 / 60 / 24) + " days ago.");
((System.currentTimeMillis() - oldDate) / 60000 / 60 / 24) + " days ago.");
} else {
return CRAWL_REJECT_REASON_DOUBLE_IN_PREFIX + ": local index, recrawl rejected. Document date = "
+ ISO8601Formatter.FORMATTER.format(new Date(oldDate)) + " is not older than crawl profile recrawl minimum date = "
@ -574,26 +573,6 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
if (!Switchboard.getSwitchboard()
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
final FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null) {
if (!f.isListed(url, null)) {
return "the url '"
+ url
+ "' does not belong to the network mandatory filter list";
final boolean local = url.isLocal();
if (this.acceptLocalURLs && local) return null;
if (this.acceptGlobalURLs && !local) return null;
@ -604,8 +583,8 @@ public final class CrawlStacker implements WorkflowTask<Request>{
//assert local == yacyURL.isLocalDomain(url.hash()); // TODO: remove the dnsResolve above!
final InetAddress ia = Domains.dnsResolve(host);
return (local) ?
("the host '" + host + "' is local, but local addresses are not accepted: " + ((ia == null) ? "DNS lookup resulted in null (unknown host name)" : ia.getHostAddress())) :
("the host '" + host + "' is global, but global addresses are not accepted: " + ((ia == null) ? "null" : ia.getHostAddress()));
("the host '" + host + "' is local, but local addresses are not accepted: " + ((ia == null) ? "DNS lookup resulted in null (unknown host name)" : ia.getHostAddress())) :
("the host '" + host + "' is global, but global addresses are not accepted: " + ((ia == null) ? "null" : ia.getHostAddress()));
public String urlInAcceptedDomainHash(final byte[] urlhash) {
@ -617,8 +596,8 @@ public final class CrawlStacker implements WorkflowTask<Request>{
if (this.acceptLocalURLs && local) return null;
if (this.acceptGlobalURLs && !local) return null;
return (local) ?
("the urlhash '" + ASCII.String(urlhash) + "' is local, but local addresses are not accepted") :
("the urlhash '" + ASCII.String(urlhash) + "' is global, but global addresses are not accepted");
("the urlhash '" + ASCII.String(urlhash) + "' is local, but local addresses are not accepted") :
("the urlhash '" + ASCII.String(urlhash) + "' is global, but global addresses are not accepted");
public boolean acceptLocalURLs() {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff