mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Full redesign of solr connection architecture. This was done to support
multiple solr cores instead of just one. Therefore it is now necessary to distuingish between solr server connections (called an 'Instance') and a connection to a single solr core. One Instance may now have multiple connector classes assigned to it, each connecting to a single core. To support multiple cores it is also necessary to distinguish between the connection configuration and the configuration of the index schema. We will have multiple schema configurations in the future, each for every solr core. This caused that the IndexFederated servlet had to be split into two parts, the new Servlet for the Schema editor is now in the IndexSchema Servlet.
This commit is contained in:
parent
4111606654
commit
b6de1f42dc
|
@ -72,7 +72,7 @@ public class ConfigHeuristics_p {
|
|||
}
|
||||
if (post.containsKey("opensearch_off")) sb.setConfig("heuristic.opensearch", false);
|
||||
if (post.containsKey("discoverosd")) {
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
if (!metafieldNOTavailable) {
|
||||
OpenSearchConnector osc = new OpenSearchConnector(sb, false);
|
||||
if (osc.discoverFromSolrIndex(sb)) {
|
||||
|
@ -102,24 +102,24 @@ public class ConfigHeuristics_p {
|
|||
}
|
||||
|
||||
if (post.containsKey("setopensearch")) {
|
||||
// read index scheme table flags
|
||||
// read index schema table flags
|
||||
writeopensearchcfg (sb,post);
|
||||
}
|
||||
|
||||
if (post.containsKey("switchsolrfieldson")) {
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
if (metafieldNOTavailable) {
|
||||
ConfigurationSet.Entry entry;
|
||||
entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.outboundlinks_tag_txt.name());
|
||||
entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.outboundlinks_tag_txt.name());
|
||||
if (entry != null && !entry.enabled()) {
|
||||
entry.setEnable(true);
|
||||
}
|
||||
entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.inboundlinks_tag_txt.name());
|
||||
entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.inboundlinks_tag_txt.name());
|
||||
if (entry != null && !entry.enabled()) {
|
||||
entry.setEnable(true);
|
||||
}
|
||||
try {
|
||||
sb.index.fulltext().getSolrScheme().commit();
|
||||
sb.index.fulltext().getSolrSchema().commit();
|
||||
} catch (IOException ex) {}
|
||||
}
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ public class ConfigHeuristics_p {
|
|||
}
|
||||
}
|
||||
|
||||
final boolean showmetafieldbutton = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
final boolean showmetafieldbutton = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
if (showmetafieldbutton) prop.put("osdsolrfieldswitch",1);
|
||||
prop.put("site.checked", sb.getConfigBool("heuristic.site", false) ? 1 : 0);
|
||||
prop.put("searchresult.checked", sb.getConfigBool("heuristic.searchresults", false) ? 1 : 0);
|
||||
|
@ -175,7 +175,7 @@ public class ConfigHeuristics_p {
|
|||
}
|
||||
|
||||
private static void writeopensearchcfg(final Switchboard sb, final serverObjects post) {
|
||||
// read index scheme table flags
|
||||
// read index schema table flags
|
||||
|
||||
final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
|
||||
ConfigurationSet cfg = new ConfigurationSet(f);
|
||||
|
|
|
@ -55,7 +55,7 @@ public class CrawlResults {
|
|||
final serverObjects prop = new serverObjects();
|
||||
|
||||
int lines = 500;
|
||||
boolean showCollection = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt);
|
||||
boolean showCollection = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt);
|
||||
boolean showInit = env.getConfigBool("IndexMonitorInit", false);
|
||||
boolean showExec = env.getConfigBool("IndexMonitorExec", false);
|
||||
boolean showDate = env.getConfigBool("IndexMonitorDate", true);
|
||||
|
|
|
@ -76,7 +76,7 @@ public class CrawlStartExpert_p {
|
|||
prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0");
|
||||
prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0");
|
||||
|
||||
boolean collectionEnabled = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt);
|
||||
boolean collectionEnabled = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt);
|
||||
prop.put("collectionEnabled", collectionEnabled ? 1 : 0);
|
||||
prop.put("collection", collectionEnabled ? "user" : "");
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Federated Index</title>
|
||||
<title>YaCy '#[clientname]#': Remote Solr Configuration</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexFederated_p">
|
||||
|
@ -12,7 +12,7 @@
|
|||
</div>
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuIndexControl.template%#
|
||||
<h2>Federated Index</h2>
|
||||
<h2>Remote Solr Configuration</h2>
|
||||
<p>
|
||||
YaCy supports multiple index storage locations. At this time only the YaCy-internal search index can be used for the YaCy search interface
|
||||
A Solr index storage location is optional. The local index storage location can be disabled.
|
||||
|
@ -64,38 +64,12 @@
|
|||
<dt class="TableCellDark">Solr URL(s)</dt>
|
||||
<dd><textarea rows="2" cols="80" name="solr.indexing.url" id="solr.indexing.url"/>#[solr.indexing.url]#</textarea><br/>
|
||||
You can set one or more Solr targets here which are accessed as a shard. For several targets, list them using a ',' (comma) as separator.</dd>
|
||||
<dt class="TableCellDark">Commit-Within (milliseconds)</dt>
|
||||
<dd><input type="text" size="6" maxlength="6" value="#[solr.indexing.commitWithinMs]#" name="solr.indexing.commitWithinMs" id="solr.indexing.commitWithinMs"/> (increase this value to i.e. 180000 - 3 minutes - for more performance)</dd>
|
||||
<dt class="TableCellDark">Lazy Value Initialization</dt>
|
||||
<dd><input type="checkbox" name="solr.indexing.lazy" id="solr.indexing.lazy" #(solr.indexing.lazy.checked)#:: checked="checked"#(/solr.indexing.lazy.checked)# /> (if checked, only non-zero values and non-empty strings are written)</dd>
|
||||
<dt class="TableCellDark">Sharding Method</dt>
|
||||
<dd><input type="text" size="50" maxlength="50" value="#[solr.indexing.sharding]#" name="solr.indexing.sharding" id="solr.indexing.sharding" disabled="disabled"/></dd>
|
||||
<dt class="TableCellDark">Scheme</dt>
|
||||
<dd><input type="text" size="50" maxlength="50" value="#[solr.indexing.schemefile]#" name="solr.indexing.schemefile" id="solr.indexing.schemefile" disabled="disabled"/></dd>
|
||||
<dt></dt><dd><input type="submit" name="set" value="Set" /></dd>
|
||||
</dl>
|
||||
</div>
|
||||
<div>
|
||||
<h3>Index Scheme</h3><p>If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name</p>
|
||||
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
|
||||
<tr class="TableHeader" valign="bottom">
|
||||
<td>Active</td>
|
||||
<td>Attribute</td>
|
||||
<td>Custom Solr Field Name</td>
|
||||
<td>Comment</td>
|
||||
</tr>
|
||||
#{scheme}#
|
||||
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
|
||||
<td align="center"><input type="checkbox" name="scheme_#[key]#" value="checked" #(checked)#::checked="checked"#(/checked)#/></td>
|
||||
<td align="left">#[key]#</td>
|
||||
<td align="left"><input type="text" name="scheme_solrfieldname_#[key]#" value="#[solrfieldname]#"/></td>
|
||||
<td align="left">#[comment]#</td>
|
||||
</tr>
|
||||
#{/scheme}#
|
||||
</table>
|
||||
</div>
|
||||
</fieldset>
|
||||
<input type="submit" name="set" value="Set" />
|
||||
</fieldset>
|
||||
</form>
|
||||
|
||||
#%env/templates/footer.template%#
|
||||
|
|
|
@ -3,10 +3,6 @@
|
|||
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||
* First released 25.05.2011 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $
|
||||
* $LastChangedRevision: 7653 $
|
||||
* $LastChangedBy: orbiter $
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
|
@ -26,17 +22,16 @@ import java.io.BufferedReader;
|
|||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
import net.yacy.cora.document.UTF8;
|
||||
import net.yacy.cora.federate.solr.YaCySchema;
|
||||
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.ShardSelection;
|
||||
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.federate.yacy.ConfigurationSet;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.OS;
|
||||
|
@ -95,7 +90,6 @@ public class IndexFederated_p {
|
|||
String solrurls = post.get("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr"));
|
||||
final boolean solrRemoteIsOnAfterwards = post.getBoolean("solr.indexing.solrremote") & solrurls.length() > 0;
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, solrRemoteIsOnAfterwards);
|
||||
boolean lazy = post.getBoolean("solr.indexing.lazy");
|
||||
final BufferedReader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(UTF8.getBytes(solrurls))));
|
||||
final StringBuilder s = new StringBuilder();
|
||||
String s0;
|
||||
|
@ -113,11 +107,8 @@ public class IndexFederated_p {
|
|||
}
|
||||
solrurls = s.toString().trim();
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, solrurls);
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy);
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, post.get("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5")));
|
||||
final String schemename = post.get("solr.indexing.schemefile", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list"));
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, schemename);
|
||||
|
||||
|
||||
if (solrRemoteWasOn && !solrRemoteIsOnAfterwards) {
|
||||
// switch off
|
||||
try {
|
||||
|
@ -133,7 +124,8 @@ public class IndexFederated_p {
|
|||
final boolean usesolr = sb.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) & solrurls.length() > 0;
|
||||
try {
|
||||
if (usesolr) {
|
||||
SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true);
|
||||
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
|
||||
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
|
||||
sb.index.fulltext().connectRemoteSolr(solr);
|
||||
} else {
|
||||
sb.index.fulltext().disconnectRemoteSolr();
|
||||
|
@ -149,35 +141,6 @@ public class IndexFederated_p {
|
|||
} catch (SolrException e) {
|
||||
Log.logSevere("IndexFederated_p", "change of solr connection failed", e);
|
||||
}
|
||||
|
||||
// read index scheme table flags
|
||||
final Iterator<ConfigurationSet.Entry> i = sb.index.fulltext().getSolrScheme().entryIterator();
|
||||
ConfigurationSet.Entry entry;
|
||||
boolean modified = false; // flag to remember changes
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
final String v = post.get("scheme_" + entry.key());
|
||||
final String sfn = post.get("scheme_solrfieldname_" + entry.key());
|
||||
if (sfn != null ) {
|
||||
// set custom solr field name
|
||||
if (!sfn.equals(entry.getValue())) {
|
||||
entry.setValue(sfn);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
// set enable flag
|
||||
final boolean c = v != null && v.equals("checked");
|
||||
if (entry.enabled() != c) {
|
||||
entry.setEnable(c);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
if (modified) { // save settings to config file if modified
|
||||
try {
|
||||
sb.index.fulltext().getSolrScheme().commit();
|
||||
modified = false;
|
||||
} catch (IOException ex) {}
|
||||
}
|
||||
}
|
||||
|
||||
// show solr host table
|
||||
|
@ -187,7 +150,7 @@ public class IndexFederated_p {
|
|||
prop.put("table", 1);
|
||||
final SolrConnector solr = sb.index.fulltext().getRemoteSolr();
|
||||
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((RemoteSolrConnector) solr).getSize()};
|
||||
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((RemoteSolrConnector) solr).getAdminInterface()};
|
||||
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SolrRemoteInstance) ((RemoteSolrConnector) solr).getInstance()).getAdminInterface()};
|
||||
boolean dark = false;
|
||||
for (int i = 0; i < size.length; i++) {
|
||||
prop.put("table_list_" + i + "_dark", dark ? 1 : 0); dark = !dark;
|
||||
|
@ -197,34 +160,12 @@ public class IndexFederated_p {
|
|||
prop.put("table_list", size.length);
|
||||
}
|
||||
|
||||
// write scheme
|
||||
final String schemename = sb.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list");
|
||||
|
||||
int c = 0;
|
||||
boolean dark = false;
|
||||
// use enum SolrField to keep defined order
|
||||
for(YaCySchema field : YaCySchema.values()) {
|
||||
prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
|
||||
prop.put("scheme_" + c + "_checked", sb.index.fulltext().getSolrScheme().contains(field.name()) ? 1 : 0);
|
||||
prop.putHTML("scheme_" + c + "_key", field.name());
|
||||
prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
|
||||
if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment());
|
||||
c++;
|
||||
}
|
||||
prop.put("scheme", c);
|
||||
|
||||
// fill attribute fields
|
||||
// allowed values are: classic, solr, off
|
||||
// federated.service.yacy.indexing.engine = classic
|
||||
|
||||
prop.put(SwitchboardConstants.CORE_SERVICE_FULLTEXT + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false) ? 1 : 0);
|
||||
prop.put(SwitchboardConstants.CORE_SERVICE_RWI + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, false) ? 1 : 0);
|
||||
prop.put(SwitchboardConstants.CORE_SERVICE_CITATION + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, false) ? 1 : 0);
|
||||
prop.put("solr.indexing.solrremote.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) ? 1 : 0);
|
||||
prop.put("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr").replace(",", "\n"));
|
||||
prop.put("solr.indexing.lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0);
|
||||
prop.put("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5"));
|
||||
prop.put("solr.indexing.schemefile", schemename);
|
||||
|
||||
if ((sb.index.fulltext().connectedURLDb())) {
|
||||
prop.put("migrateUrlDbtoSolr", 1);
|
||||
|
|
48
htroot/IndexSchema_p.html
Normal file
48
htroot/IndexSchema_p.html
Normal file
|
@ -0,0 +1,48 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>YaCy '#[clientname]#': Solr Schema Editor</title>
|
||||
#%env/templates/metas.template%#
|
||||
</head>
|
||||
<body id="IndexSchema_p">
|
||||
<div id="api">
|
||||
<a href="/api/schema.xml">
|
||||
<img src="env/grafics/api.png" width="60" height="40" alt="API" /></a>
|
||||
<span>The solr schema can also be retrieved as xml here. Click the API icon to see the xml. Just copy this xml to solr/conf/schema.xml to configure solr.</span>
|
||||
</div>
|
||||
#%env/templates/header.template%#
|
||||
#%env/templates/submenuIndexControl.template%#
|
||||
<h2>Solr Schema Editor</h2>
|
||||
<p>If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name</p>
|
||||
|
||||
<form action="IndexSchema_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset>
|
||||
<div>
|
||||
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
|
||||
<tr class="TableHeader" valign="bottom">
|
||||
<td>Active</td>
|
||||
<td>Attribute</td>
|
||||
<td>Custom Solr Field Name</td>
|
||||
<td>Comment</td>
|
||||
</tr>
|
||||
#{schema}#
|
||||
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
|
||||
<td align="center"><input type="checkbox" name="schema_#[key]#" value="checked" #(checked)#::checked="checked"#(/checked)#/></td>
|
||||
<td align="left">#[key]#</td>
|
||||
<td align="left"><input type="text" name="schema_solrfieldname_#[key]#" value="#[solrfieldname]#"/></td>
|
||||
<td align="left">#[comment]#</td>
|
||||
</tr>
|
||||
#{/schema}#
|
||||
</table>
|
||||
<dl>
|
||||
<dt class="TableCellDark">Lazy Value Initialization</dt>
|
||||
<dd><input type="checkbox" name="lazy" id="lazy" #(lazy.checked)#:: checked="checked"#(/lazy.checked)# /> (if checked, only non-zero values and non-empty strings are written)</dd>
|
||||
</dl>
|
||||
</div>
|
||||
</fieldset>
|
||||
<input type="submit" name="set" value="Set" />
|
||||
</form>
|
||||
|
||||
#%env/templates/footer.template%#
|
||||
</body>
|
||||
</html>
|
92
htroot/IndexSchema_p.java
Normal file
92
htroot/IndexSchema_p.java
Normal file
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
* IndexSchemaFulltext_p
|
||||
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||
* First released 13.02.2013 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.yacy.cora.federate.solr.YaCySchema;
|
||||
import net.yacy.cora.federate.yacy.ConfigurationSet;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.server.serverObjects;
|
||||
import net.yacy.server.serverSwitch;
|
||||
|
||||
public class IndexSchema_p {
|
||||
|
||||
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final serverObjects prop = new serverObjects();
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
if (post != null && post.containsKey("set")) {
|
||||
// read index schema table flags
|
||||
final Iterator<ConfigurationSet.Entry> i = sb.index.fulltext().getSolrSchema().entryIterator();
|
||||
ConfigurationSet.Entry entry;
|
||||
boolean modified = false; // flag to remember changes
|
||||
while (i.hasNext()) {
|
||||
entry = i.next();
|
||||
final String v = post.get("schema_" + entry.key());
|
||||
final String sfn = post.get("schema_solrfieldname_" + entry.key());
|
||||
if (sfn != null ) {
|
||||
// set custom solr field name
|
||||
if (!sfn.equals(entry.getValue())) {
|
||||
entry.setValue(sfn);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
// set enable flag
|
||||
final boolean c = v != null && v.equals("checked");
|
||||
if (entry.enabled() != c) {
|
||||
entry.setEnable(c);
|
||||
modified = true;
|
||||
}
|
||||
}
|
||||
if (modified) { // save settings to config file if modified
|
||||
try {
|
||||
sb.index.fulltext().getSolrSchema().commit();
|
||||
modified = false;
|
||||
} catch (IOException ex) {}
|
||||
}
|
||||
|
||||
boolean lazy = post.getBoolean("lazy");
|
||||
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy);
|
||||
|
||||
}
|
||||
|
||||
int c = 0;
|
||||
boolean dark = false;
|
||||
// use enum SolrField to keep defined order
|
||||
for(YaCySchema field : YaCySchema.values()) {
|
||||
prop.put("schema_" + c + "_dark", dark ? 1 : 0); dark = !dark;
|
||||
prop.put("schema_" + c + "_checked", sb.index.fulltext().getSolrSchema().contains(field.name()) ? 1 : 0);
|
||||
prop.putHTML("schema_" + c + "_key", field.name());
|
||||
prop.putHTML("schema_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
|
||||
if (field.getComment() != null) prop.putHTML("schema_" + c + "_comment",field.getComment());
|
||||
c++;
|
||||
}
|
||||
prop.put("schema", c);
|
||||
|
||||
prop.put("lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0);
|
||||
|
||||
// return rewrite properties
|
||||
return prop;
|
||||
}
|
||||
}
|
|
@ -39,11 +39,11 @@ public class schema {
|
|||
final servletProperties prop = new servletProperties();
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
// write scheme
|
||||
// write schema
|
||||
int c = 0;
|
||||
SolrConfiguration solrScheme = sb.index.fulltext().getSolrScheme();
|
||||
SolrConfiguration solrSchema = sb.index.fulltext().getSolrSchema();
|
||||
for (YaCySchema field : YaCySchema.values()) {
|
||||
if (solrScheme.contains(field.name())) {
|
||||
if (solrSchema.contains(field.name())) {
|
||||
addField(prop, c, field);
|
||||
c++;
|
||||
}
|
||||
|
@ -51,13 +51,13 @@ public class schema {
|
|||
//if (solrScheme.contains(YaCySchema.author)) {addField(prop, c, YaCySchema.author_sxt);}
|
||||
prop.put("fields", c);
|
||||
|
||||
prop.put("copyFieldAuthor", solrScheme.contains(YaCySchema.author) ? 1 : 0);
|
||||
prop.put("copyFieldAuthor", solrSchema.contains(YaCySchema.author) ? 1 : 0);
|
||||
|
||||
prop.put("solruniquekey",YaCySchema.id.getSolrFieldName());
|
||||
prop.put("solrdefaultsearchfield",
|
||||
solrScheme.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() :
|
||||
solrScheme.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() :
|
||||
solrScheme.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() :
|
||||
solrSchema.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() :
|
||||
solrSchema.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() :
|
||||
solrSchema.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() :
|
||||
YaCySchema.id.getSolrFieldName()
|
||||
);
|
||||
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
<h3>Index Administration</h3>
|
||||
<ul class="SubMenu">
|
||||
<li><a href="/IndexControlURLs_p.html" class="MenuItemLink lock">Database Administration</a></li>
|
||||
<li><a href="/IndexFederated_p.html" class="MenuItemLink lock">Federated Solr Index</a></li>
|
||||
<li><a href="/IndexSchema_p.html" class="MenuItemLink lock">Solr Schema Editor</a></li>
|
||||
<li><a href="/IndexFederated_p.html" class="MenuItemLink lock">Remote Solr Configuration</a></li>
|
||||
#(p2p)#::<li><a href="/IndexControlRWIs_p.html" class="MenuItemLink lock">Reverse Word Index Administration</a></li>#(/p2p)#
|
||||
<!--<li><a href="/IndexControlCleaner_p.html" class="MenuItemLink lock">Index Cleaner</a></li>-->
|
||||
<li><a href="/ConfigHTCache_p.html" class="MenuItemLink lock">Web Cache</a></li>
|
||||
|
|
|
@ -114,7 +114,7 @@ public class searchresult {
|
|||
|
||||
// get a solr query string
|
||||
QueryGoal qg = new QueryGoal(originalQuery, originalQuery);
|
||||
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrScheme());
|
||||
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrSchema());
|
||||
post.put("defType", "edismax");
|
||||
post.put(CommonParams.Q, solrQ.toString());
|
||||
post.put(CommonParams.ROWS, post.remove("num"));
|
||||
|
|
|
@ -180,7 +180,7 @@ public class OpenSearchConnector {
|
|||
Log.logSevere("OpenSearchConnector.Discover", "Error on connecting to embedded Solr index");
|
||||
return false;
|
||||
}
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
|
||||
if (metafieldNOTavailable) {
|
||||
Log.logWarning("OpenSearchConnector.Discover", "Solr Schema field outboundlinks_tag_txt must be switched on");
|
||||
return false;
|
||||
|
|
|
@ -27,7 +27,7 @@ public interface Schema {
|
|||
* this shall be implemented as enum, thus shall have the name() method
|
||||
* @return the name of the enum constant
|
||||
*/
|
||||
public String name(); // default field name (according to SolCell default scheme) <= enum.name()
|
||||
public String name(); // default field name (according to SolCell default schema) <= enum.name()
|
||||
|
||||
public String getSolrFieldName(); // return the default or custom solr field name to use for solr requests
|
||||
|
||||
|
|
|
@ -352,24 +352,5 @@ public enum YaCySchema implements Schema {
|
|||
doc.setField(this.getSolrFieldName(), value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a SolrDocument to a SolrInputDocument.
|
||||
* This is useful if a document from the search index shall be modified and indexed again.
|
||||
* This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
|
||||
* which are created automatically during the indexing process.
|
||||
* @param doc the solr document
|
||||
* @return a solr input document
|
||||
*/
|
||||
public static SolrInputDocument toSolrInputDocument(SolrDocument doc) {
|
||||
SolrInputDocument sid = new SolrInputDocument();
|
||||
Set<String> omitFields = new HashSet<String>();
|
||||
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
|
||||
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
|
||||
omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
|
||||
for (String name: doc.getFieldNames()) {
|
||||
if (!omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
|
||||
}
|
||||
return sid;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -213,12 +213,12 @@ public abstract class AbstractSolrConnector implements SolrConnector {
|
|||
params.setRows(0);
|
||||
params.setStart(0);
|
||||
params.setFacet(false);
|
||||
//params.setFields(YaCySchema.id.getSolrFieldName());
|
||||
params.setFields(YaCySchema.id.getSolrFieldName());
|
||||
|
||||
// query the server
|
||||
QueryResponse rsp = query(params);
|
||||
final SolrDocumentList docs = rsp.getResults();
|
||||
return docs.getNumFound();
|
||||
return docs == null ? 0 : docs.getNumFound();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,15 +21,16 @@
|
|||
|
||||
package net.yacy.cora.federate.solr.connector;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.federate.solr.instance.SolrEmbeddedInstance;
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
@ -39,113 +40,113 @@ import org.apache.solr.common.util.SimpleOrderedMap;
|
|||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.QueryComponent;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SearchHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequestBase;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import org.apache.solr.servlet.SolrRequestParsers;
|
||||
|
||||
public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector {
|
||||
|
||||
public static final String SELECT = "/select";
|
||||
public static final String CONTEXT = "/solr";
|
||||
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
|
||||
|
||||
private CoreContainer cores;
|
||||
private String defaultCoreName;
|
||||
private SolrCore defaultCore;
|
||||
|
||||
private final SearchHandler requestHandler;
|
||||
private final File storagePath;
|
||||
private final SolrEmbeddedInstance instance;
|
||||
private SolrCore core;
|
||||
|
||||
public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException {
|
||||
public EmbeddedSolrConnector(SolrEmbeddedInstance instance) {
|
||||
super();
|
||||
// copy the solrconfig.xml to the storage path
|
||||
this.storagePath = storagePath;
|
||||
File conf = new File(storagePath, "conf");
|
||||
conf.mkdirs();
|
||||
File source, target;
|
||||
for (String cf: confFiles) {
|
||||
source = new File(solr_config, cf);
|
||||
if (source.isDirectory()) {
|
||||
target = new File(conf, cf);
|
||||
target.mkdirs();
|
||||
for (String cfl: source.list()) {
|
||||
try {
|
||||
Files.copy(new File(source, cfl), new File(target, cfl));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
target = new File(conf, cf);
|
||||
target.getParentFile().mkdirs();
|
||||
try {
|
||||
Files.copy(source, target);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.cores = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml")); // this may take indefinitely long if solr files are broken
|
||||
if (this.cores == null) {
|
||||
// try again
|
||||
System.gc();
|
||||
this.cores = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml"));
|
||||
}
|
||||
this.defaultCoreName = this.cores.getDefaultCoreName();
|
||||
Log.logInfo("EmbeddedSolrConnector", "detected default solr core: " + this.defaultCoreName);
|
||||
this.defaultCore = this.cores.getCore(this.defaultCoreName); // should be "collection1"
|
||||
if (this.defaultCore == null) {
|
||||
// try again
|
||||
Collection<SolrCore> cores = this.cores.getCores();
|
||||
if (cores.size() > 0) {
|
||||
this.defaultCore = cores.iterator().next();
|
||||
this.defaultCoreName = this.defaultCore.getName();
|
||||
}
|
||||
}
|
||||
if (this.defaultCore == null) {
|
||||
throw new IOException("cannot get the default core; available = " + MemoryControl.available() + ", free = " + MemoryControl.free());
|
||||
}
|
||||
final NamedList<Object> config = new NamedList<Object>();
|
||||
this.instance = instance;
|
||||
this.core = this.instance.getDefaultCore();
|
||||
this.requestHandler = new SearchHandler();
|
||||
this.requestHandler.init(config);
|
||||
this.requestHandler.inform(this.defaultCore);
|
||||
super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName));
|
||||
this.requestHandler.init(new NamedList<Object>());
|
||||
this.requestHandler.inform(this.core);
|
||||
super.init(this.instance.getDefaultServer());
|
||||
}
|
||||
|
||||
public EmbeddedSolrConnector(SolrEmbeddedInstance instance, String coreName) {
|
||||
super();
|
||||
this.instance = instance;
|
||||
this.core = this.instance.getCore(coreName);
|
||||
this.requestHandler = new SearchHandler();
|
||||
this.requestHandler.init(new NamedList<Object>());
|
||||
this.requestHandler.inform(this.core);
|
||||
super.init(this.instance.getServer(coreName));
|
||||
}
|
||||
|
||||
public File getStoragePath() {
|
||||
return this.storagePath;
|
||||
public SolrInstance getInstance() {
|
||||
return this.instance;
|
||||
}
|
||||
|
||||
public SolrCore getCore() {
|
||||
return this.defaultCore;
|
||||
return this.core;
|
||||
}
|
||||
|
||||
public SolrConfig getConfig() {
|
||||
return this.defaultCore.getSolrConfig();
|
||||
return this.core.getSolrConfig();
|
||||
}
|
||||
|
||||
private static final SolrRequestParsers _parser = new SolrRequestParsers(null);
|
||||
|
||||
/**
|
||||
* get the size of the index. We override the implementation in SolrServerConnector
|
||||
* because we can do this with more efficiently in a different way for embedded indexes.
|
||||
*/
|
||||
@Override
|
||||
public long getSize() {
|
||||
// do some magic here to prevent the super.getSize() call which is a bad hack
|
||||
return super.getSize();
|
||||
String threadname = Thread.currentThread().getName();
|
||||
Thread.currentThread().setName("solr query: size");
|
||||
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
|
||||
CoreContainer coreContainer = ess.getCoreContainer();
|
||||
String coreName = coreContainer.getDefaultCoreName();
|
||||
SolrCore core = coreContainer.getCore(coreName);
|
||||
if (core == null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "No such core: " + coreName);
|
||||
|
||||
try {
|
||||
SolrParams params = AbstractSolrConnector.catchSuccessQuery;
|
||||
QueryRequest request = new QueryRequest(AbstractSolrConnector.catchSuccessQuery);
|
||||
SolrQueryRequest req = _parser.buildRequestFrom(core, params, request.getContentStreams());
|
||||
String path = "/select";
|
||||
req.getContext().put("path", path);
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
|
||||
SolrRequestHandler handler = core.getRequestHandler(path);
|
||||
SearchHandler sh = (SearchHandler) handler;
|
||||
List<SearchComponent> components = sh.getComponents();
|
||||
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
|
||||
QueryComponent qc = (QueryComponent) components.get(0);
|
||||
qc.prepare(rb);
|
||||
qc.process(rb);
|
||||
qc.finishStage(rb);
|
||||
int hits = rb.getResults().docList.matches();
|
||||
if (req != null) req.close();
|
||||
core.close();
|
||||
SolrRequestInfo.clearRequestInfo();
|
||||
Thread.currentThread().setName(threadname);
|
||||
return hits;
|
||||
} catch (final Throwable e) {
|
||||
log.warn(e);
|
||||
Thread.currentThread().setName(threadname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
try {this.commit(false);} catch (Throwable e) {Log.logException(e);}
|
||||
try {super.close();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.defaultCore.close();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);}
|
||||
try {this.core.close();} catch (Throwable e) {Log.logException(e);}
|
||||
}
|
||||
|
||||
public SolrQueryRequest request(final SolrParams params) {
|
||||
SolrQueryRequest req = null;
|
||||
req = new SolrQueryRequestBase(this.defaultCore, params){};
|
||||
req = new SolrQueryRequestBase(this.core, params){};
|
||||
req.getContext().put("path", SELECT);
|
||||
req.getContext().put("webapp", CONTEXT);
|
||||
return req;
|
||||
|
|
|
@ -21,11 +21,14 @@
|
|||
package net.yacy.cora.federate.solr.connector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import net.yacy.cora.federate.solr.YaCySchema;
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
import net.yacy.cora.storage.ARC;
|
||||
import net.yacy.cora.storage.ConcurrentARC;
|
||||
|
||||
|
@ -51,8 +54,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
|
||||
private final static Object EXIST = new Object();
|
||||
|
||||
private SolrConnector solr0;
|
||||
private SolrConnector solr1;
|
||||
private EmbeddedSolrConnector solr0;
|
||||
private ShardSolrConnector solr1;
|
||||
private int hitCacheMax, missCacheMax, partitions;
|
||||
private final Map<String, HitMissCache> hitMissCache;
|
||||
private final Map<String, ARC<String, Object>> fieldCache; // a map from a field name to a id-key/value object cache
|
||||
|
@ -105,12 +108,19 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* the usage of getInstance is not possible here, use getSolr0().getInstance() instead
|
||||
*/
|
||||
public SolrInstance getInstance() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public boolean isConnected0() {
|
||||
return this.solr0 != null;
|
||||
}
|
||||
|
||||
public void connect0(SolrConnector c) {
|
||||
public void connect0(EmbeddedSolrConnector c) {
|
||||
this.solr0 = c;
|
||||
}
|
||||
|
||||
|
@ -120,7 +130,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
|
||||
public void disconnect0() {
|
||||
if (this.solr0 == null) return;
|
||||
SolrInstance instance = this.solr0.getInstance();
|
||||
this.solr0.close();
|
||||
instance.close();
|
||||
this.solr0 = null;
|
||||
}
|
||||
|
||||
|
@ -128,7 +140,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
return this.solr1 != null;
|
||||
}
|
||||
|
||||
public void connect1(SolrConnector c) {
|
||||
public void connect1(ShardSolrConnector c) {
|
||||
this.solr1 = c;
|
||||
}
|
||||
|
||||
|
@ -138,6 +150,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
|
||||
public void disconnect1() {
|
||||
if (this.solr1 == null) return;
|
||||
// we cannot get the instance here because that is not applicable
|
||||
this.solr1.close();
|
||||
this.solr1 = null;
|
||||
}
|
||||
|
@ -165,8 +178,16 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
|
|||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
if (this.solr0 != null) this.solr0.close();
|
||||
if (this.solr1 != null) this.solr1.close();
|
||||
if (this.solr0 != null) {
|
||||
SolrInstance instance = this.solr0.getInstance();
|
||||
this.solr0.close();
|
||||
instance.close();
|
||||
}
|
||||
if (this.solr1 != null) {
|
||||
ArrayList<SolrRemoteInstance> instances = this.solr1.getInstances();
|
||||
this.solr1.close();
|
||||
for (SolrRemoteInstance instance: instances) instance.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,9 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -37,20 +40,20 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
|
|||
private final AddWorker[] worker;
|
||||
private final SolrConnector solr;
|
||||
|
||||
public MultipleSolrConnector(final String url, final int connections) throws IOException {
|
||||
this.solr = new RemoteSolrConnector(url);
|
||||
public MultipleSolrConnector(final SolrRemoteInstance instance, final String corename, final int connections) {
|
||||
this.solr = new RemoteSolrConnector(instance, corename);
|
||||
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
|
||||
this.worker = new AddWorker[connections];
|
||||
for (int i = 0; i < connections; i++) {
|
||||
this.worker[i] = new AddWorker(url);
|
||||
this.worker[i] = new AddWorker(instance, corename);
|
||||
this.worker[i].start();
|
||||
}
|
||||
}
|
||||
|
||||
private class AddWorker extends Thread {
|
||||
private final SolrConnector solr;
|
||||
public AddWorker(final String url) throws IOException {
|
||||
this.solr = new RemoteSolrConnector(url);
|
||||
public AddWorker(final SolrRemoteInstance instance, final String corename) {
|
||||
this.solr = new RemoteSolrConnector(instance, corename);
|
||||
}
|
||||
@Override
|
||||
public void run() {
|
||||
|
|
|
@ -22,145 +22,55 @@ package net.yacy.cora.federate.solr.connector;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
|
||||
import org.apache.commons.httpclient.HttpException;
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HeaderElement;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.HttpRequest;
|
||||
import org.apache.http.HttpRequestInterceptor;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.HttpResponseInterceptor;
|
||||
import org.apache.http.auth.AuthScope;
|
||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||
import org.apache.http.client.AuthCache;
|
||||
import org.apache.http.client.entity.GzipDecompressingEntity;
|
||||
import org.apache.http.client.protocol.ClientContext;
|
||||
import org.apache.http.impl.auth.BasicScheme;
|
||||
import org.apache.http.impl.client.BasicAuthCache;
|
||||
import org.apache.http.impl.client.BasicCredentialsProvider;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
import org.apache.http.impl.conn.PoolingClientConnectionManager;
|
||||
import org.apache.http.protocol.HttpContext;
|
||||
import org.apache.solr.client.solrj.ResponseParser;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
|
||||
public class RemoteSolrConnector extends SolrServerConnector implements SolrConnector {
|
||||
|
||||
private final String solrurl, host, solrpath, solraccount, solrpw;
|
||||
private DefaultHttpClient client;
|
||||
private final int port;
|
||||
|
||||
SolrRemoteInstance instance;
|
||||
String corename;
|
||||
|
||||
/**
|
||||
* create a new solr connector
|
||||
* @param url the solr url, like http://192.168.1.60:8983/solr/ or http://admin:pw@192.168.1.60:8983/solr/
|
||||
* @param scheme
|
||||
* @param instance the instance of the remote solr url, like http://192.168.1.60:8983/solr/ or http://admin:pw@192.168.1.60:8983/solr/
|
||||
* @throws IOException
|
||||
*/
|
||||
public RemoteSolrConnector(final String url) throws IOException {
|
||||
public RemoteSolrConnector(final SolrRemoteInstance instance) throws IOException {
|
||||
super();
|
||||
this.solrurl = url;
|
||||
|
||||
// connect using authentication
|
||||
final MultiProtocolURI u = new MultiProtocolURI(this.solrurl);
|
||||
this.host = u.getHost();
|
||||
this.port = u.getPort();
|
||||
this.solrpath = u.getPath();
|
||||
final String userinfo = u.getUserInfo();
|
||||
if (userinfo == null || userinfo.isEmpty()) {
|
||||
this.solraccount = ""; this.solrpw = "";
|
||||
} else {
|
||||
final int p = userinfo.indexOf(':');
|
||||
if (p < 0) {
|
||||
this.solraccount = userinfo; this.solrpw = "";
|
||||
} else {
|
||||
this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1);
|
||||
}
|
||||
}
|
||||
HttpSolrServer s;
|
||||
if (this.solraccount.length() > 0) {
|
||||
PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); // try also: ThreadSafeClientConnManager
|
||||
cm.setMaxTotal(100);
|
||||
|
||||
this.client = new DefaultHttpClient(cm) {
|
||||
@Override
|
||||
protected HttpContext createHttpContext() {
|
||||
HttpContext context = super.createHttpContext();
|
||||
AuthCache authCache = new BasicAuthCache();
|
||||
BasicScheme basicAuth = new BasicScheme();
|
||||
HttpHost targetHost = new HttpHost(u.getHost(), u.getPort(), u.getProtocol());
|
||||
authCache.put(targetHost, basicAuth);
|
||||
context.setAttribute(ClientContext.AUTH_CACHE, authCache);
|
||||
return context;
|
||||
}
|
||||
};
|
||||
this.client.addRequestInterceptor(new HttpRequestInterceptor() {
|
||||
@Override
|
||||
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
|
||||
if (!request.containsHeader("Accept-Encoding")) request.addHeader("Accept-Encoding", "gzip");
|
||||
}
|
||||
|
||||
});
|
||||
this.client.addResponseInterceptor(new HttpResponseInterceptor() {
|
||||
@Override
|
||||
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
|
||||
HttpEntity entity = response.getEntity();
|
||||
if (entity != null) {
|
||||
Header ceheader = entity.getContentEncoding();
|
||||
if (ceheader != null) {
|
||||
HeaderElement[] codecs = ceheader.getElements();
|
||||
for (HeaderElement codec : codecs) {
|
||||
if (codec.getName().equalsIgnoreCase("gzip")) {
|
||||
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
BasicCredentialsProvider credsProvider = new BasicCredentialsProvider();
|
||||
credsProvider.setCredentials(new AuthScope(this.host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(this.solraccount, this.solrpw));
|
||||
this.client.setCredentialsProvider(credsProvider);
|
||||
String p = "http://" + this.host + ":" + this.port + this.solrpath;
|
||||
Log.logInfo("RemoteSolrConnector", "connecting Solr authenticated with url:" + p);
|
||||
s = new HttpSolrServer(p, this.client);
|
||||
} else {
|
||||
Log.logInfo("RemoteSolrConnector", "connecting Solr with url:" + this.solrurl);
|
||||
s = new HttpSolrServer(this.solrurl);
|
||||
}
|
||||
s.setAllowCompression(true);
|
||||
s.setConnectionTimeout(60000);
|
||||
s.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated)
|
||||
s.setSoTimeout(60000);
|
||||
this.instance = instance;
|
||||
this.corename = this.instance.getDefaultCoreName();
|
||||
SolrServer s = instance.getServer(this.corename);
|
||||
super.init(s);
|
||||
}
|
||||
|
||||
public RemoteSolrConnector(final SolrRemoteInstance instance, String corename) {
|
||||
super();
|
||||
this.instance = instance;
|
||||
this.corename = corename == null ? this.instance.getDefaultCoreName() : corename;
|
||||
SolrServer s = instance.getServer(this.corename);
|
||||
super.init(s);
|
||||
}
|
||||
|
||||
public void terminate() {
|
||||
if (this.client != null) this.client.getConnectionManager().shutdown();
|
||||
public SolrInstance getInstance() {
|
||||
return this.instance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
super.close();
|
||||
this.terminate();
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResponse query(ModifiableSolrParams params) throws IOException {
|
||||
try {
|
||||
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
|
||||
String q = params.get("q");
|
||||
String threadname = Thread.currentThread().getName();
|
||||
|
@ -170,32 +80,30 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
|
|||
ResponseParser responseParser = new XMLResponseParser();
|
||||
request.setResponseParser(responseParser);
|
||||
long t = System.currentTimeMillis();
|
||||
NamedList<Object> result = server.request(request);
|
||||
NamedList<Object> result;
|
||||
try {
|
||||
result = server.request(request);
|
||||
} catch (Throwable e) {
|
||||
server = instance.getServer(this.corename);
|
||||
super.init(server);
|
||||
try {
|
||||
result = server.request(request);
|
||||
} catch (Throwable e1) {
|
||||
throw new IOException(e1.getMessage());
|
||||
}
|
||||
}
|
||||
QueryResponse response = new QueryResponse(result, server);
|
||||
response.setElapsedTime(System.currentTimeMillis() - t);
|
||||
|
||||
if (q != null) Thread.currentThread().setName(threadname);
|
||||
return response;
|
||||
} catch (Throwable e) {
|
||||
throw new IOException("Error executing query", e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getAdminInterface() {
|
||||
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
|
||||
final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();
|
||||
String u = this.solrurl;
|
||||
int p = u.indexOf("localhost",0);
|
||||
if (p < 0) p = u.indexOf("127.0.0.1",0);
|
||||
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
|
||||
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
|
||||
return u + (u.endsWith("/") ? "admin/" : "/admin/");
|
||||
}
|
||||
|
||||
public static void main(final String args[]) {
|
||||
RemoteSolrConnector solr;
|
||||
try {
|
||||
solr = new RemoteSolrConnector("http://127.0.0.1:8983/solr");
|
||||
SolrRemoteInstance instance = new SolrRemoteInstance("http://127.0.0.1:8983/solr/");
|
||||
solr = new RemoteSolrConnector(instance, "solr");
|
||||
solr.clear();
|
||||
final File exampleDir = new File("test/parsertest/");
|
||||
long t, t0, a = 0;
|
||||
|
@ -211,8 +119,10 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
|
|||
System.out.println("pushed file " + s + " to solr, " + t0 + " milliseconds");
|
||||
}
|
||||
System.out.println("pushed " + c + " files in " + a + " milliseconds, " + (a / c) + " milliseconds average; " + (60000 / a * c) + " PPM");
|
||||
solr.commit(false);
|
||||
} catch (final IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@ package net.yacy.cora.federate.solr.connector;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -37,7 +39,7 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
|
|||
this.solrConnector = solrConnector;
|
||||
this.retryMaxTime = retryMaxTime;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void commit(boolean softCommit) {
|
||||
this.solrConnector.commit(softCommit);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* SolrChardingConnector
|
||||
* ShardSolrConnector
|
||||
* Copyright 2011 by Michael Peter Christen
|
||||
* First released 25.05.2011 at http://yacy.net
|
||||
*
|
||||
|
@ -21,7 +21,6 @@
|
|||
package net.yacy.cora.federate.solr.connector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
@ -30,34 +29,69 @@ import java.util.Map;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
|
||||
import org.apache.solr.client.solrj.response.FacetField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
||||
public class ShardSolrConnector extends AbstractSolrConnector implements SolrConnector {
|
||||
|
||||
private final List<SolrConnector> connectors;
|
||||
private final ArrayList<SolrRemoteInstance> instances;
|
||||
private final ArrayList<SolrConnector> connectors;
|
||||
private final ShardSelection sharding;
|
||||
private final String[] urls;
|
||||
private final String[] adminInterfaces;
|
||||
|
||||
public ShardSolrConnector(final String urlList, final ShardSelection.Method method, final long timeout, boolean multipleConnections) throws IOException {
|
||||
urlList.replace(' ', ',');
|
||||
this.urls = urlList.split(",");
|
||||
public ShardSolrConnector(
|
||||
ArrayList<SolrRemoteInstance> instances,
|
||||
final ShardSelection.Method method, boolean multipleConnections) {
|
||||
this.instances = instances;
|
||||
this.connectors = new ArrayList<SolrConnector>();
|
||||
SolrConnector s;
|
||||
for (final String u: this.urls) {
|
||||
s = multipleConnections ? new MultipleSolrConnector(u.trim(), 2) : new RemoteSolrConnector(u.trim());
|
||||
this.connectors.add(new RetrySolrConnector(s, timeout));
|
||||
this.adminInterfaces = new String[instances.size()];
|
||||
int c = 0;
|
||||
String defaultCoreName = instances.get(0).getDefaultCoreName();
|
||||
for (final SolrRemoteInstance instance: instances) {
|
||||
adminInterfaces[c++] = instance.getAdminInterface();
|
||||
s = multipleConnections ? new MultipleSolrConnector(instance, defaultCoreName, 2) : new RemoteSolrConnector(instance, defaultCoreName);
|
||||
this.connectors.add(s /*new RetrySolrConnector(s, timeout)*/);
|
||||
}
|
||||
this.sharding = new ShardSelection(method, this.urls.length);
|
||||
this.sharding = new ShardSelection(method, this.connectors.size());
|
||||
}
|
||||
|
||||
public static ArrayList<SolrRemoteInstance> getShardInstances(final String urlList) throws IOException {
|
||||
urlList.replace(' ', ',');
|
||||
String[] urls = urlList.split(",");
|
||||
ArrayList<SolrRemoteInstance> instances = new ArrayList<SolrRemoteInstance>();
|
||||
for (final String u: urls) {
|
||||
SolrRemoteInstance instance = new SolrRemoteInstance(u);
|
||||
instances.add(instance);
|
||||
}
|
||||
return instances;
|
||||
}
|
||||
|
||||
public static ArrayList<SolrRemoteInstance> getShardInstances(final String urlList, Collection<String> coreNames, String defaultCoreName) throws IOException {
|
||||
urlList.replace(' ', ',');
|
||||
String[] urls = urlList.split(",");
|
||||
ArrayList<SolrRemoteInstance> instances = new ArrayList<SolrRemoteInstance>();
|
||||
for (final String u: urls) {
|
||||
SolrRemoteInstance instance = new SolrRemoteInstance(u, coreNames, defaultCoreName);
|
||||
instances.add(instance);
|
||||
}
|
||||
return instances;
|
||||
}
|
||||
|
||||
public ArrayList<SolrRemoteInstance> getInstances() {
|
||||
return this.instances;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit(boolean softCommit) {
|
||||
for (final SolrConnector connector: this.connectors) connector.commit(softCommit);
|
||||
|
@ -67,6 +101,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
|
|||
* force an explicit merge of segments
|
||||
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
|
||||
*/
|
||||
@Override
|
||||
public void optimize(int maxSegments) {
|
||||
for (final SolrConnector connector: this.connectors) connector.optimize(maxSegments);
|
||||
}
|
||||
|
@ -185,11 +220,70 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
|
|||
|
||||
@Override
|
||||
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
|
||||
|
||||
final SimpleOrderedMap<Object> facet_countsAcc = new SimpleOrderedMap<Object>();
|
||||
final SimpleOrderedMap<Object> highlightingAcc = new SimpleOrderedMap<Object>();
|
||||
final SimpleOrderedMap<Object> headerAcc = new SimpleOrderedMap<Object>();
|
||||
final SolrDocumentList resultsAcc = new SolrDocumentList();
|
||||
|
||||
// concurrently call all shards
|
||||
List<Thread> t = new ArrayList<Thread>();
|
||||
for (final SolrConnector connector: this.connectors) {
|
||||
QueryResponse rsp = connector.query(query);
|
||||
if (rsp != null && rsp.getResults().size() > 0) return rsp;
|
||||
Thread t0 = new Thread() {
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void run() {
|
||||
QueryResponse rsp;
|
||||
try {
|
||||
rsp = connector.query(query);
|
||||
} catch (Throwable e) {return;}
|
||||
NamedList<Object> response = rsp.getResponse();
|
||||
|
||||
// set the header; this is mostly always the same (well this is not evaluated much)
|
||||
SimpleOrderedMap<Object> header = (SimpleOrderedMap<Object>) response.get("responseHeader");
|
||||
//Integer status = (Integer) header.get("status");
|
||||
//Integer QTime = (Integer) header.get("QTime");
|
||||
//SimpleOrderedMap<Object> params = (SimpleOrderedMap<Object>) header.get("params");
|
||||
if (headerAcc.size() == 0) {
|
||||
for (Map.Entry<String, Object> e: header) headerAcc.add(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
// accumulate the results
|
||||
SolrDocumentList results = (SolrDocumentList) response.get("response");
|
||||
long found = results.size();
|
||||
for (int i = 0; i < found; i++) resultsAcc.add(results.get(i));
|
||||
resultsAcc.setNumFound(resultsAcc.getNumFound() + results.getNumFound());
|
||||
resultsAcc.setMaxScore(Math.max(resultsAcc.getMaxScore() == null ? 0f : resultsAcc.getMaxScore().floatValue(), results.getMaxScore() == null ? 0f : results.getMaxScore().floatValue()));
|
||||
|
||||
// accumulate the highlighting
|
||||
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) response.get("highlighting");
|
||||
if (highlighting != null) {
|
||||
for (Map.Entry<String, Object> e: highlighting) highlightingAcc.add(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
// accumulate the facets (well this is not correct at this time...)
|
||||
SimpleOrderedMap<Object> facet_counts = (SimpleOrderedMap<Object>) response.get("facet_counts");
|
||||
if (facet_counts != null) {
|
||||
for (Map.Entry<String, Object> e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue());
|
||||
}
|
||||
}
|
||||
};
|
||||
t0.start();
|
||||
t.add(t0);
|
||||
}
|
||||
return new QueryResponse();
|
||||
for (Thread t0: t) {
|
||||
try {t0.join();} catch (InterruptedException e) {}
|
||||
}
|
||||
|
||||
// prepare combined response
|
||||
QueryResponse rspAcc = new QueryResponse();
|
||||
NamedList<Object> nl = new NamedList<Object>();
|
||||
nl.add("responseHeader", headerAcc);
|
||||
nl.add("response", resultsAcc);
|
||||
if (highlightingAcc != null && highlightingAcc.size() > 0) nl.add("highlighting", highlightingAcc);
|
||||
if (facet_countsAcc != null && facet_countsAcc.size() > 0) nl.add("facet_counts", facet_countsAcc);
|
||||
rspAcc.setResponse(nl);
|
||||
return rspAcc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -250,18 +344,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
|
|||
}
|
||||
|
||||
public String[] getAdminInterfaceList() {
|
||||
final String[] urlAdmin = new String[this.connectors.size()];
|
||||
int i = 0;
|
||||
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
|
||||
final String localhostExtern = localhostExternAddress == null ? Domains.LOCALHOST : localhostExternAddress.getHostAddress();
|
||||
for (String u: this.urls) {
|
||||
int p = u.indexOf("localhost",0);
|
||||
if (p < 0) p = u.indexOf("127.0.0.1",0);
|
||||
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
|
||||
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
|
||||
urlAdmin[i++] = u + (u.endsWith("/") ? "admin/" : "/admin/");
|
||||
}
|
||||
return urlAdmin;
|
||||
return this.adminInterfaces;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
import net.yacy.cora.federate.solr.instance.SolrInstance;
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
|
@ -35,7 +36,7 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
|
||||
|
||||
|
||||
/**
|
||||
* force a commit
|
||||
*/
|
||||
|
|
|
@ -28,28 +28,15 @@ import net.yacy.cora.federate.solr.YaCySchema;
|
|||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.QueryComponent;
|
||||
import org.apache.solr.handler.component.ResponseBuilder;
|
||||
import org.apache.solr.handler.component.SearchComponent;
|
||||
import org.apache.solr.handler.component.SearchHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.servlet.SolrRequestParsers;
|
||||
|
||||
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
|
||||
|
||||
|
@ -74,9 +61,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
|||
//if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).getHttpClient().getConnectionManager().closeExpiredConnections();
|
||||
try {
|
||||
this.server.commit(true, true, softCommit);
|
||||
if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).shutdown();
|
||||
//if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).shutdown();
|
||||
} catch (Throwable e) {
|
||||
Log.logException(e);
|
||||
//Log.logException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,67 +82,15 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
|
|||
@Override
|
||||
public synchronized void close() {
|
||||
try {
|
||||
if (this.server != null) synchronized (this.server) {this.server.commit(true, true, false);}
|
||||
if (this.server != null && this.server instanceof EmbeddedSolrServer) synchronized (this.server) {this.server.commit(true, true, false);}
|
||||
this.server = null;
|
||||
} catch (Throwable e) {
|
||||
Log.logException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static final SolrRequestParsers _parser = new SolrRequestParsers(null);
|
||||
|
||||
@Override
|
||||
public long getSize() {
|
||||
String threadname = Thread.currentThread().getName();
|
||||
Thread.currentThread().setName("solr query: size");
|
||||
if (this.server instanceof EmbeddedSolrServer) {
|
||||
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
|
||||
CoreContainer coreContainer = ess.getCoreContainer();
|
||||
String coreName = coreContainer.getDefaultCoreName();
|
||||
SolrCore core = coreContainer.getCore(coreName);
|
||||
if (core == null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "No such core: " + coreName);
|
||||
|
||||
try {
|
||||
SolrParams params = AbstractSolrConnector.catchSuccessQuery;
|
||||
QueryRequest request = new QueryRequest(AbstractSolrConnector.catchSuccessQuery);
|
||||
SolrQueryRequest req = _parser.buildRequestFrom(core, params, request.getContentStreams());
|
||||
String path = "/select";
|
||||
req.getContext().put("path", path);
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
|
||||
SolrRequestHandler handler = core.getRequestHandler(path);
|
||||
SearchHandler sh = (SearchHandler) handler;
|
||||
List<SearchComponent> components = sh.getComponents();
|
||||
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
|
||||
QueryComponent qc = (QueryComponent) components.get(0);
|
||||
qc.prepare(rb);
|
||||
qc.process(rb);
|
||||
qc.finishStage(rb);
|
||||
int hits = rb.getResults().docList.matches();
|
||||
if (req != null) req.close();
|
||||
core.close();
|
||||
SolrRequestInfo.clearRequestInfo();
|
||||
Thread.currentThread().setName(threadname);
|
||||
return hits;
|
||||
} catch (final Throwable e) {
|
||||
log.warn(e);
|
||||
Thread.currentThread().setName(threadname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
Thread.currentThread().setName(threadname);
|
||||
return getSize0();
|
||||
}
|
||||
|
||||
public long getSize0() {
|
||||
/*
|
||||
if (this.server instanceof EmbeddedSolrServer) {
|
||||
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
|
||||
CoreContainer coreContainer = ess.getCoreContainer();
|
||||
String coreName = coreContainer.getDefaultCoreName();
|
||||
SolrCore core = coreContainer.getCore(coreName);
|
||||
}
|
||||
*/
|
||||
try {
|
||||
final QueryResponse rsp = query(AbstractSolrConnector.catchSuccessQuery);
|
||||
if (rsp == null) return 0;
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
/**
|
||||
* SolrEmbeddedInstance
|
||||
* Copyright 2013 by Michael Peter Christen
|
||||
* First released 13.02.2013 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.instance;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.util.MemoryControl;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
|
||||
public class SolrEmbeddedInstance implements SolrInstance {
|
||||
|
||||
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
|
||||
|
||||
private CoreContainer cores;
|
||||
private String defaultCoreName;
|
||||
private SolrCore defaultCore;
|
||||
private SolrServer defaultServer;
|
||||
private File storagePath;
|
||||
|
||||
public SolrEmbeddedInstance(final File corePath, final File solr_config) throws IOException {
|
||||
super();
|
||||
// copy the solrconfig.xml to the storage path
|
||||
this.storagePath = corePath;
|
||||
File conf = new File(corePath, "conf");
|
||||
conf.mkdirs();
|
||||
File source, target;
|
||||
for (String cf: confFiles) {
|
||||
source = new File(solr_config, cf);
|
||||
if (source.isDirectory()) {
|
||||
target = new File(conf, cf);
|
||||
target.mkdirs();
|
||||
for (String cfl: source.list()) {
|
||||
try {
|
||||
Files.copy(new File(source, cfl), new File(target, cfl));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
target = new File(conf, cf);
|
||||
target.getParentFile().mkdirs();
|
||||
try {
|
||||
Files.copy(source, target);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
String dir = corePath.getAbsolutePath();
|
||||
File configFile = new File(solr_config, "solr.xml");
|
||||
this.cores = new CoreContainer(dir, configFile); // this may take indefinitely long if solr files are broken
|
||||
if (this.cores == null) throw new IOException("cannot create core container dir = " + dir + ", configFile = " + configFile);
|
||||
this.defaultCoreName = this.cores.getDefaultCoreName();
|
||||
Log.logInfo("SolrEmbeddedInstance", "detected default solr core: " + this.defaultCoreName);
|
||||
this.defaultCore = this.cores.getCore(this.defaultCoreName); // should be "collection1"
|
||||
if (this.defaultCore == null) {
|
||||
// try again
|
||||
Collection<SolrCore> cores = this.cores.getCores();
|
||||
if (cores.size() > 0) {
|
||||
this.defaultCore = cores.iterator().next();
|
||||
this.defaultCoreName = this.defaultCore.getName();
|
||||
}
|
||||
}
|
||||
if (this.defaultCore == null) {
|
||||
throw new IOException("cannot get the default core; available = " + MemoryControl.available() + ", free = " + MemoryControl.free());
|
||||
}
|
||||
this.defaultServer = new EmbeddedSolrServer(this.cores, this.defaultCoreName);
|
||||
}
|
||||
|
||||
|
||||
public File getStoragePath() {
|
||||
return this.storagePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDefaultCoreName() {
|
||||
return this.defaultCoreName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getCoreNames() {
|
||||
return this.cores.getCoreNames();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrServer getDefaultServer() {
|
||||
return this.defaultServer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrServer getServer(String name) {
|
||||
return new EmbeddedSolrServer(this.cores, name);
|
||||
}
|
||||
|
||||
public SolrCore getDefaultCore() {
|
||||
return this.defaultCore;
|
||||
}
|
||||
|
||||
public SolrCore getCore(String name) {
|
||||
return this.cores.getCore(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* SolrInstance
|
||||
* Copyright 2013 by Michael Peter Christen
|
||||
* First released 13.02.2013 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.instance;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
|
||||
public interface SolrInstance {
|
||||
|
||||
public String getDefaultCoreName();
|
||||
|
||||
public Collection<String> getCoreNames();
|
||||
|
||||
public SolrServer getDefaultServer();
|
||||
|
||||
public SolrServer getServer(String name);
|
||||
|
||||
public void close();
|
||||
}
|
|
@ -0,0 +1,226 @@
|
|||
/**
|
||||
* SolrRemoteInstance
|
||||
* Copyright 2013 by Michael Peter Christen
|
||||
* First released 13.02.2013 at http://yacy.net
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.federate.solr.instance;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.Domains;
|
||||
import net.yacy.kelondro.logging.Log;
|
||||
|
||||
import org.apache.commons.httpclient.HttpException;
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HeaderElement;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.HttpRequest;
|
||||
import org.apache.http.HttpRequestInterceptor;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.HttpResponseInterceptor;
|
||||
import org.apache.http.auth.AuthScope;
|
||||
import org.apache.http.auth.UsernamePasswordCredentials;
|
||||
import org.apache.http.client.AuthCache;
|
||||
import org.apache.http.client.entity.GzipDecompressingEntity;
|
||||
import org.apache.http.client.protocol.ClientContext;
|
||||
import org.apache.http.impl.auth.BasicScheme;
|
||||
import org.apache.http.impl.client.BasicAuthCache;
|
||||
import org.apache.http.impl.client.BasicCredentialsProvider;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
import org.apache.http.impl.conn.PoolingClientConnectionManager;
|
||||
import org.apache.http.protocol.HttpContext;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
|
||||
public class SolrRemoteInstance implements SolrInstance {
|
||||
|
||||
private String solrurl;
|
||||
private DefaultHttpClient client;
|
||||
|
||||
private String defaultCoreName;
|
||||
private HttpSolrServer defaultServer;
|
||||
private Collection<String> coreNames;
|
||||
|
||||
public SolrRemoteInstance(final String url) throws IOException {
|
||||
this(url, null, url.endsWith("solr/") || url.endsWith("solr") ? "solr" : "shard0");
|
||||
}
|
||||
|
||||
public SolrRemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName) throws IOException {
|
||||
this.solrurl = url;
|
||||
if (this.solrurl == null) this.solrurl = "http://127.0.0.1:8983/solr/"; // that should work for the example configuration of solr 4.x.x
|
||||
this.coreNames = coreNames == null ? new ArrayList<String>() : coreNames;
|
||||
this.defaultCoreName = defaultCoreName;
|
||||
if (this.defaultCoreName == null) this.defaultCoreName = "shard0";
|
||||
if (!this.coreNames.contains(this.defaultCoreName)) this.coreNames.add(this.defaultCoreName);
|
||||
|
||||
// check the url
|
||||
if (this.solrurl.endsWith("/")) {
|
||||
// this could mean that we have a path without a core name (correct)
|
||||
// or that the core name is appended and contains a badly '/' at the end (must be corrected)
|
||||
if (this.solrurl.endsWith(this.defaultCoreName + "/")) {
|
||||
this.solrurl = this.solrurl.substring(0, this.solrurl.length() - this.defaultCoreName.length() - 1);
|
||||
}
|
||||
} else {
|
||||
// this could mean that we have an url which ends with the core name (must be corrected)
|
||||
// or that the url has a mising '/' (must be corrected)
|
||||
if (this.solrurl.endsWith(this.defaultCoreName)) {
|
||||
this.solrurl = this.solrurl.substring(0, this.solrurl.length() - this.defaultCoreName.length());
|
||||
} else {
|
||||
this.solrurl = this.solrurl + "/";
|
||||
}
|
||||
}
|
||||
|
||||
// Make a http client, connect using authentication. An url like
|
||||
// http://127.0.0.1:8983/solr/shard0
|
||||
// is proper, and contains the core name as last element in the path
|
||||
final MultiProtocolURI u;
|
||||
try {
|
||||
u = new MultiProtocolURI(this.solrurl + this.defaultCoreName);
|
||||
} catch (MalformedURLException e) {
|
||||
throw new IOException(e.getMessage());
|
||||
}
|
||||
String solraccount, solrpw;
|
||||
String host = u.getHost();
|
||||
final String userinfo = u.getUserInfo();
|
||||
if (userinfo == null || userinfo.isEmpty()) {
|
||||
solraccount = ""; solrpw = "";
|
||||
} else {
|
||||
final int p = userinfo.indexOf(':');
|
||||
if (p < 0) {
|
||||
solraccount = userinfo; solrpw = "";
|
||||
} else {
|
||||
solraccount = userinfo.substring(0, p); solrpw = userinfo.substring(p + 1);
|
||||
}
|
||||
}
|
||||
if (solraccount.length() > 0) {
|
||||
PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); // try also: ThreadSafeClientConnManager
|
||||
cm.setMaxTotal(100);
|
||||
|
||||
this.client = new DefaultHttpClient(cm) {
|
||||
@Override
|
||||
protected HttpContext createHttpContext() {
|
||||
HttpContext context = super.createHttpContext();
|
||||
AuthCache authCache = new BasicAuthCache();
|
||||
BasicScheme basicAuth = new BasicScheme();
|
||||
HttpHost targetHost = new HttpHost(u.getHost(), u.getPort(), u.getProtocol());
|
||||
authCache.put(targetHost, basicAuth);
|
||||
context.setAttribute(ClientContext.AUTH_CACHE, authCache);
|
||||
return context;
|
||||
}
|
||||
};
|
||||
this.client.addRequestInterceptor(new HttpRequestInterceptor() {
|
||||
@Override
|
||||
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
|
||||
if (!request.containsHeader("Accept-Encoding")) request.addHeader("Accept-Encoding", "gzip");
|
||||
}
|
||||
|
||||
});
|
||||
this.client.addResponseInterceptor(new HttpResponseInterceptor() {
|
||||
@Override
|
||||
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
|
||||
HttpEntity entity = response.getEntity();
|
||||
if (entity != null) {
|
||||
Header ceheader = entity.getContentEncoding();
|
||||
if (ceheader != null) {
|
||||
HeaderElement[] codecs = ceheader.getElements();
|
||||
for (HeaderElement codec : codecs) {
|
||||
if (codec.getName().equalsIgnoreCase("gzip")) {
|
||||
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
BasicCredentialsProvider credsProvider = new BasicCredentialsProvider();
|
||||
credsProvider.setCredentials(new AuthScope(host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(solraccount, solrpw));
|
||||
this.client.setCredentialsProvider(credsProvider);
|
||||
} else {
|
||||
this.client = null;
|
||||
}
|
||||
|
||||
this.defaultServer = (HttpSolrServer) getServer(this.defaultCoreName);
|
||||
if (this.defaultServer == null) throw new IOException("cannot connect to url " + url + " and connect core " + defaultCoreName);
|
||||
}
|
||||
|
||||
public String getAdminInterface() {
|
||||
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
|
||||
final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();
|
||||
String u = this.solrurl;
|
||||
int p = u.indexOf("localhost",0);
|
||||
if (p < 0) p = u.indexOf("127.0.0.1",0);
|
||||
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
|
||||
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
|
||||
return u + (u.endsWith("/") ? "admin/" : "/admin/");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDefaultCoreName() {
|
||||
return this.defaultCoreName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getCoreNames() {
|
||||
return this.coreNames;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrServer getDefaultServer() {
|
||||
return this.defaultServer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SolrServer getServer(String name) {
|
||||
HttpSolrServer server;
|
||||
if (this.client != null) {
|
||||
final MultiProtocolURI u;
|
||||
try {
|
||||
u = new MultiProtocolURI(this.solrurl + name);
|
||||
} catch (MalformedURLException e) {
|
||||
return null;
|
||||
}
|
||||
String host = u.getHost();
|
||||
int port = u.getPort();
|
||||
String solrpath = u.getPath();
|
||||
String p = "http://" + host + ":" + port + solrpath;
|
||||
Log.logInfo("RemoteSolrConnector", "connecting Solr authenticated with url:" + p);
|
||||
server = new HttpSolrServer(p, client);
|
||||
} else {
|
||||
Log.logInfo("RemoteSolrConnector", "connecting Solr with url:" + this.solrurl + name);
|
||||
server = new HttpSolrServer(this.solrurl + name);
|
||||
}
|
||||
server.setAllowCompression(true);
|
||||
server.setConnectionTimeout(60000);
|
||||
server.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated)
|
||||
server.setSoTimeout(60000);
|
||||
return server;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (this.client != null) this.client.getConnectionManager().shutdown();
|
||||
}
|
||||
|
||||
}
|
|
@ -84,8 +84,8 @@ public class CrawlQueues {
|
|||
this.log.logConfig("Starting Crawling Management");
|
||||
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
|
||||
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
|
||||
this.errorURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
this.delegatedURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
this.errorURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrSchema(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
|
||||
this.delegatedURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrSchema(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
|
||||
}
|
||||
|
||||
public void relocate(final File newQueuePath) {
|
||||
|
@ -96,8 +96,8 @@ public class CrawlQueues {
|
|||
|
||||
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
|
||||
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
|
||||
this.errorURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
|
||||
this.delegatedURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
|
||||
this.errorURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrSchema(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
|
||||
this.delegatedURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrSchema(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
|
||||
}
|
||||
|
||||
public synchronized void close() {
|
||||
|
|
|
@ -32,7 +32,6 @@ package net.yacy.data;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
|
|
@ -72,6 +72,7 @@ import net.yacy.cora.federate.opensearch.SRURSSConnector;
|
|||
import net.yacy.cora.federate.solr.YaCySchema;
|
||||
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||
import net.yacy.cora.order.Base64Order;
|
||||
import net.yacy.cora.order.Digest;
|
||||
|
@ -105,7 +106,6 @@ import net.yacy.search.EventTracker;
|
|||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import net.yacy.search.index.Segment;
|
||||
import net.yacy.search.query.QueryModifier;
|
||||
import net.yacy.search.query.SearchEvent;
|
||||
import net.yacy.search.query.SecondarySearchSuperviser;
|
||||
import net.yacy.search.snippet.TextSnippet;
|
||||
|
@ -1059,9 +1059,12 @@ public final class Protocol {
|
|||
} else {
|
||||
final String solrURL = "http://" + target.getPublicAddress() + "/solr";
|
||||
try {
|
||||
SolrConnector solrConnector = new RemoteSolrConnector(solrURL);
|
||||
SolrRemoteInstance instance = new SolrRemoteInstance(solrURL);
|
||||
SolrConnector solrConnector = new RemoteSolrConnector(instance, "solr");
|
||||
rsp = solrConnector.query(solrQuery);
|
||||
docList = rsp.getResults();
|
||||
solrConnector.close();
|
||||
instance.close();
|
||||
// no need to close this here because that sends a commit to remote solr which is not wanted here
|
||||
} catch (Throwable e) {
|
||||
Network.log.logInfo("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")", e);
|
||||
|
@ -1148,7 +1151,7 @@ public final class Protocol {
|
|||
// passed all checks, store url
|
||||
if (!localsearch) {
|
||||
try {
|
||||
event.query.getSegment().fulltext().putDocument(YaCySchema.toSolrInputDocument(doc));
|
||||
event.query.getSegment().fulltext().putDocument(event.query.getSegment().fulltext().getSolrSchema().toSolrInputDocument(doc));
|
||||
ResultURLs.stack(
|
||||
ASCII.String(urlEntry.url().hash()),
|
||||
urlEntry.url().getHost(),
|
||||
|
|
|
@ -103,6 +103,7 @@ import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
|
|||
import net.yacy.cora.federate.solr.connector.ShardSelection;
|
||||
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
|
||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||
import net.yacy.cora.federate.yacy.ConfigurationSet;
|
||||
import net.yacy.cora.lod.JenaTripleStore;
|
||||
|
@ -413,9 +414,9 @@ public final class Switchboard extends serverSwitch {
|
|||
for (YaCySchema field: new YaCySchema[]{
|
||||
YaCySchema.host_s, YaCySchema.load_date_dt,
|
||||
YaCySchema.url_file_ext_s, YaCySchema.last_modified, // needed for media search and /date operator
|
||||
YaCySchema.url_paths_sxt, YaCySchema.host_organization_s, // needed to search in the url
|
||||
YaCySchema.inboundlinks_protocol_sxt, YaCySchema.inboundlinks_urlstub_txt, // needed for HostBrowser
|
||||
YaCySchema.outboundlinks_protocol_sxt, YaCySchema.outboundlinks_urlstub_txt // needed to enhance the crawler
|
||||
/*YaCySchema.url_paths_sxt,*/ YaCySchema.host_organization_s, // needed to search in the url
|
||||
/*YaCySchema.inboundlinks_protocol_sxt,*/ YaCySchema.inboundlinks_urlstub_txt, // needed for HostBrowser
|
||||
/*YaCySchema.outboundlinks_protocol_sxt,*/ YaCySchema.outboundlinks_urlstub_txt // needed to enhance the crawler
|
||||
}) {
|
||||
ConfigurationSet.Entry entry = solrScheme.get(field.name()); entry.setEnable(true); solrScheme.put(field.name(), entry);
|
||||
}
|
||||
|
@ -441,10 +442,8 @@ public final class Switchboard extends serverSwitch {
|
|||
|
||||
if (usesolr && solrurls != null && solrurls.length() > 0) {
|
||||
try {
|
||||
SolrConnector solr = new ShardSolrConnector(
|
||||
solrurls,
|
||||
ShardSelection.Method.MODULO_HOST_MD5,
|
||||
10000, true);
|
||||
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
|
||||
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
|
||||
this.index.fulltext().connectRemoteSolr(solr);
|
||||
} catch ( final IOException e ) {
|
||||
Log.logException(e);
|
||||
|
@ -1231,7 +1230,7 @@ public final class Switchboard extends serverSwitch {
|
|||
synchronized ( this ) {
|
||||
|
||||
// remember the solr scheme
|
||||
SolrConfiguration solrScheme = this.index.fulltext().getSolrScheme();
|
||||
SolrConfiguration solrScheme = this.index.fulltext().getSolrSchema();
|
||||
|
||||
// shut down
|
||||
this.crawler.close();
|
||||
|
@ -1293,10 +1292,8 @@ public final class Switchboard extends serverSwitch {
|
|||
|
||||
if (usesolr && solrurls != null && solrurls.length() > 0) {
|
||||
try {
|
||||
SolrConnector solr = new ShardSolrConnector(
|
||||
solrurls,
|
||||
ShardSelection.Method.MODULO_HOST_MD5,
|
||||
10000, true);
|
||||
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
|
||||
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
|
||||
this.index.fulltext().connectRemoteSolr(solr);
|
||||
} catch ( final IOException e ) {
|
||||
Log.logException(e);
|
||||
|
@ -2220,7 +2217,7 @@ public final class Switchboard extends serverSwitch {
|
|||
|
||||
// if no crawl is running and processing is activated:
|
||||
// execute the (post-) processing steps for all entries that have a process tag assigned
|
||||
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) {
|
||||
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrSchema().contains(YaCySchema.process_sxt)) {
|
||||
// that means we must search for those entries.
|
||||
index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found
|
||||
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
|
||||
|
@ -2240,7 +2237,7 @@ public final class Switchboard extends serverSwitch {
|
|||
// switch over tag types
|
||||
if (tagtype == ProcessType.CLICKDEPTH) {
|
||||
//proctags.remove(tag);
|
||||
if (index.fulltext().getSolrScheme().contains(YaCySchema.clickdepth_i)) {
|
||||
if (index.fulltext().getSolrSchema().contains(YaCySchema.clickdepth_i)) {
|
||||
DigestURI url;
|
||||
try {
|
||||
// get new click depth and compare with old
|
||||
|
@ -2248,11 +2245,11 @@ public final class Switchboard extends serverSwitch {
|
|||
url = new DigestURI((String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
|
||||
int clickdepth = SolrConfiguration.getClickDepth(index.urlCitation(), url);
|
||||
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++;
|
||||
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
|
||||
SolrInputDocument sid = index.fulltext().getSolrSchema().toSolrInputDocument(doc);
|
||||
sid.setField(YaCySchema.clickdepth_i.getSolrFieldName(), clickdepth);
|
||||
|
||||
// refresh the link count; it's 'cheap' to do this here
|
||||
if (index.fulltext().getSolrScheme().contains(YaCySchema.references_i)) {
|
||||
if (index.fulltext().getSolrSchema().contains(YaCySchema.references_i)) {
|
||||
Integer oldreferences = (Integer) doc.getFieldValue(YaCySchema.references_i.getSolrFieldName());
|
||||
int references = index.urlCitation().count(url.hash());
|
||||
if (references > 0) {
|
||||
|
|
|
@ -61,9 +61,9 @@ public class DocumentIndex extends Segment {
|
|||
|
||||
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
|
||||
|
||||
public DocumentIndex(final File segmentPath, final File schemePath, final CallbackListener callback, final int cachesize)
|
||||
public DocumentIndex(final File segmentPath, final File schemaPath, final CallbackListener callback, final int cachesize)
|
||||
throws IOException {
|
||||
super(new Log("DocumentIndex"), segmentPath, schemePath == null ? null : new SolrConfiguration(schemePath, true));
|
||||
super(new Log("DocumentIndex"), segmentPath, schemaPath == null ? null : new SolrConfiguration(schemaPath, true));
|
||||
super.connectRWI(cachesize, targetFileSize * 4 - 1);
|
||||
super.connectCitation(cachesize, targetFileSize * 4 - 1);
|
||||
super.connectUrlDb(
|
||||
|
|
|
@ -45,7 +45,9 @@ import net.yacy.cora.federate.solr.YaCySchema;
|
|||
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
|
||||
import net.yacy.cora.federate.solr.connector.SolrConnector;
|
||||
import net.yacy.cora.federate.solr.instance.SolrEmbeddedInstance;
|
||||
import net.yacy.cora.order.CloneableIterator;
|
||||
import net.yacy.cora.sorting.ReversibleScoreMap;
|
||||
import net.yacy.cora.sorting.ScoreMap;
|
||||
|
@ -82,16 +84,16 @@ public final class Fulltext {
|
|||
private String tablename;
|
||||
private ArrayList<HostStat> statsDump;
|
||||
private final MirrorSolrConnector solr;
|
||||
private final SolrConfiguration solrScheme;
|
||||
private final SolrConfiguration solrSchema;
|
||||
|
||||
protected Fulltext(final File path, final SolrConfiguration solrScheme) {
|
||||
protected Fulltext(final File path, final SolrConfiguration solrSchema) {
|
||||
this.location = path;
|
||||
this.tablename = null;
|
||||
this.urlIndexFile = null;
|
||||
this.exportthread = null; // will have a export thread assigned if exporter is running
|
||||
this.statsDump = null;
|
||||
this.solr = new MirrorSolrConnector(10000, 10000, 100);
|
||||
this.solrScheme = solrScheme;
|
||||
this.solrSchema = solrSchema;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -134,8 +136,8 @@ public final class Fulltext {
|
|||
this.urlIndexFile = null;
|
||||
}
|
||||
|
||||
public SolrConfiguration getSolrScheme() {
|
||||
return this.solrScheme;
|
||||
public SolrConfiguration getSolrSchema() {
|
||||
return this.solrSchema;
|
||||
}
|
||||
|
||||
public boolean connectedLocalSolr() {
|
||||
|
@ -151,7 +153,8 @@ public final class Fulltext {
|
|||
File oldLocation = new File(baseLocation, oldVersion);
|
||||
if (oldLocation.exists()) oldLocation.renameTo(solrLocation);
|
||||
}
|
||||
EmbeddedSolrConnector esc = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
|
||||
SolrEmbeddedInstance instance = new SolrEmbeddedInstance(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
|
||||
EmbeddedSolrConnector esc = new EmbeddedSolrConnector(instance);
|
||||
Version luceneVersion = esc.getConfig().getLuceneVersion("luceneMatchVersion");
|
||||
String lvn = luceneVersion.name();
|
||||
Log.logInfo("Fulltext", "using lucene version " + lvn);
|
||||
|
@ -169,7 +172,7 @@ public final class Fulltext {
|
|||
return this.solr.isConnected1();
|
||||
}
|
||||
|
||||
public void connectRemoteSolr(final SolrConnector rs) {
|
||||
public void connectRemoteSolr(final ShardSolrConnector rs) {
|
||||
this.solr.connect1(rs);
|
||||
}
|
||||
|
||||
|
@ -291,7 +294,7 @@ public final class Fulltext {
|
|||
final Row.Entry entry = this.urlIndexFile.remove(urlHash);
|
||||
if (entry == null) return null;
|
||||
URIMetadataRow row = new URIMetadataRow(entry, wre);
|
||||
SolrInputDocument solrInput = this.solrScheme.metadata2solr(row);
|
||||
SolrInputDocument solrInput = this.solrSchema.metadata2solr(row);
|
||||
this.putDocument(solrInput);
|
||||
return new URIMetadataNode(solrInput, wre, weight);
|
||||
} catch (final IOException e) {
|
||||
|
@ -309,7 +312,7 @@ public final class Fulltext {
|
|||
Date sdDate = (Date) this.solr.getFieldById(id, YaCySchema.last_modified.getSolrFieldName());
|
||||
Date docDate = null;
|
||||
if (sdDate == null || (docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified)) == null || sdDate.before(docDate)) {
|
||||
if (this.solrScheme.contains(YaCySchema.ip_s)) {
|
||||
if (this.solrSchema.contains(YaCySchema.ip_s)) {
|
||||
// ip_s needs a dns lookup which causes blockings during search here
|
||||
this.solr.add(doc);
|
||||
} else synchronized (this.solr) {
|
||||
|
@ -332,11 +335,11 @@ public final class Fulltext {
|
|||
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
|
||||
SolrDocument sd = this.solr.getById(id);
|
||||
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
|
||||
if (this.solrScheme.contains(YaCySchema.ip_s)) {
|
||||
if (this.solrSchema.contains(YaCySchema.ip_s)) {
|
||||
// ip_s needs a dns lookup which causes blockings during search here
|
||||
this.solr.add(getSolrScheme().metadata2solr(row));
|
||||
this.solr.add(getSolrSchema().metadata2solr(row));
|
||||
} else synchronized (this.solr) {
|
||||
this.solr.add(getSolrScheme().metadata2solr(row));
|
||||
this.solr.add(getSolrSchema().metadata2solr(row));
|
||||
}
|
||||
}
|
||||
} catch (SolrException e) {
|
||||
|
@ -544,11 +547,12 @@ public final class Fulltext {
|
|||
Log.logWarning("Fulltext", "HOT DUMP selected solr0 == NULL, no dump list!");
|
||||
return zips;
|
||||
}
|
||||
if (esc.getStoragePath() == null) {
|
||||
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
|
||||
if (sei.getStoragePath() == null) {
|
||||
Log.logWarning("Fulltext", "HOT DUMP selected solr0.getStoragePath() == NULL, no dump list!");
|
||||
return zips;
|
||||
}
|
||||
File storagePath = esc.getStoragePath().getParentFile();
|
||||
File storagePath = sei.getStoragePath().getParentFile();
|
||||
if (storagePath == null) {
|
||||
Log.logWarning("Fulltext", "HOT DUMP selected esc.getStoragePath().getParentFile() == NULL, no dump list!");
|
||||
return zips;
|
||||
|
@ -566,7 +570,8 @@ public final class Fulltext {
|
|||
*/
|
||||
public File dumpSolr() {
|
||||
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
|
||||
File storagePath = esc.getStoragePath();
|
||||
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
|
||||
File storagePath = sei.getStoragePath();
|
||||
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
|
||||
synchronized (this.solr) {
|
||||
this.disconnectLocalSolr();
|
||||
|
@ -591,7 +596,8 @@ public final class Fulltext {
|
|||
*/
|
||||
public void restoreSolr(File solrDumpZipFile) {
|
||||
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
|
||||
File storagePath = esc.getStoragePath();
|
||||
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
|
||||
File storagePath = sei.getStoragePath();
|
||||
synchronized (this.solr) {
|
||||
this.disconnectLocalSolr();
|
||||
try {
|
||||
|
|
|
@ -112,13 +112,13 @@ public class Segment {
|
|||
protected IndexCell<WordReference> termIndex;
|
||||
protected IndexCell<CitationReference> urlCitationIndex;
|
||||
|
||||
public Segment(final Log log, final File segmentPath, final SolrConfiguration solrScheme) {
|
||||
public Segment(final Log log, final File segmentPath, final SolrConfiguration solrSchema) {
|
||||
log.logInfo("Initializing Segment '" + segmentPath + ".");
|
||||
this.log = log;
|
||||
this.segmentPath = segmentPath;
|
||||
|
||||
// create LURL-db
|
||||
this.fulltext = new Fulltext(segmentPath, solrScheme);
|
||||
this.fulltext = new Fulltext(segmentPath, solrSchema);
|
||||
}
|
||||
|
||||
public boolean connectedRWI() {
|
||||
|
@ -371,10 +371,10 @@ public class Segment {
|
|||
this.fulltext.getSolr().commit(false);
|
||||
} else {
|
||||
if (
|
||||
(this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_unique_b)) ||
|
||||
(this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_unique_b)) ||
|
||||
this.fulltext.getSolrScheme().contains(YaCySchema.title_unique_b) ||
|
||||
this.fulltext.getSolrScheme().contains(YaCySchema.description_unique_b)
|
||||
(this.fulltext.getSolrSchema().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrSchema().contains(YaCySchema.exact_signature_unique_b)) ||
|
||||
(this.fulltext.getSolrSchema().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrSchema().contains(YaCySchema.fuzzy_signature_unique_b)) ||
|
||||
this.fulltext.getSolrSchema().contains(YaCySchema.title_unique_b) ||
|
||||
this.fulltext.getSolrSchema().contains(YaCySchema.description_unique_b)
|
||||
) {
|
||||
this.fulltext.getSolr().commit(true); // make sure that we have latest information for the postprocessing steps
|
||||
}
|
||||
|
@ -395,7 +395,7 @@ public class Segment {
|
|||
char docType = Response.docType(document.dc_format());
|
||||
|
||||
// CREATE SOLR DOCUMENT
|
||||
final SolrInputDocument solrInputDoc = this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex);
|
||||
final SolrInputDocument solrInputDoc = this.fulltext.getSolrSchema().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex);
|
||||
|
||||
// FIND OUT IF THIS IS A DOUBLE DOCUMENT
|
||||
for (YaCySchema[] checkfields: new YaCySchema[][]{
|
||||
|
@ -403,7 +403,7 @@ public class Segment {
|
|||
{YaCySchema.fuzzy_signature_l, YaCySchema.fuzzy_signature_unique_b}}) {
|
||||
YaCySchema checkfield = checkfields[0];
|
||||
YaCySchema uniquefield = checkfields[1];
|
||||
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
|
||||
if (this.fulltext.getSolrSchema().contains(checkfield) && this.fulltext.getSolrSchema().contains(uniquefield)) {
|
||||
// lookup the document with the same signature
|
||||
long signature = ((Long) solrInputDoc.getField(checkfield.getSolrFieldName()).getValue()).longValue();
|
||||
try {
|
||||
|
@ -421,7 +421,7 @@ public class Segment {
|
|||
{YaCySchema.description, YaCySchema.description_unique_b}}) {
|
||||
YaCySchema checkfield = checkfields[0];
|
||||
YaCySchema uniquefield = checkfields[1];
|
||||
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
|
||||
if (this.fulltext.getSolrSchema().contains(checkfield) && this.fulltext.getSolrSchema().contains(uniquefield)) {
|
||||
// lookup in the index for the same title
|
||||
String checkstring = checkfield == YaCySchema.title ? document.dc_title() : document.dc_description();
|
||||
if (checkstring.length() == 0) {
|
||||
|
@ -436,7 +436,7 @@ public class Segment {
|
|||
// switch attribute also in all existing documents (which should be exactly only one!)
|
||||
SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000);
|
||||
for (SolrDocument doc: docs) {
|
||||
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
|
||||
SolrInputDocument sid = this.fulltext.getSolrSchema().toSolrInputDocument(doc);
|
||||
sid.setField(uniquefield.getSolrFieldName(), false);
|
||||
this.fulltext.getSolr().add(sid);
|
||||
}
|
||||
|
@ -448,7 +448,7 @@ public class Segment {
|
|||
}
|
||||
|
||||
// ENRICH DOCUMENT WITH RANKING INFORMATION
|
||||
if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) {
|
||||
if (this.urlCitationIndex != null && this.fulltext.getSolrSchema().contains(YaCySchema.references_i)) {
|
||||
int references = this.urlCitationIndex.count(url.hash());
|
||||
if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* SolrScheme
|
||||
* SolrConfiguration
|
||||
* Copyright 2011 by Michael Peter Christen
|
||||
* First released 14.04.2011 at http://yacy.net
|
||||
*
|
||||
|
@ -33,6 +33,7 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
|
@ -68,6 +69,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
|
|||
import net.yacy.kelondro.util.Bitfield;
|
||||
import net.yacy.kelondro.util.ByteBuffer;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
||||
|
||||
|
@ -87,7 +89,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
}
|
||||
|
||||
/**
|
||||
* initialize the scheme with a given configuration file
|
||||
* initialize the schema with a given configuration file
|
||||
* the configuration file simply contains a list of lines with keywords
|
||||
* or keyword = value lines (while value is a custom Solr field name
|
||||
* @param configurationFile
|
||||
|
@ -103,17 +105,37 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
YaCySchema f = YaCySchema.valueOf(etr.key());
|
||||
f.setSolrFieldName(etr.getValue());
|
||||
} catch (IllegalArgumentException e) {
|
||||
Log.logFine("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
|
||||
Log.logFine("SolrSchema", "solr schema file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
// check consistency the other way: look if all enum constants in SolrField appear in the configuration file
|
||||
for (YaCySchema field: YaCySchema.values()) {
|
||||
if (this.get(field.name()) == null) {
|
||||
Log.logWarning("SolrScheme", " solr scheme file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
|
||||
Log.logWarning("SolrSchema", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a SolrDocument to a SolrInputDocument.
|
||||
* This is useful if a document from the search index shall be modified and indexed again.
|
||||
* This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
|
||||
* which are created automatically during the indexing process.
|
||||
* @param doc the solr document
|
||||
* @return a solr input document
|
||||
*/
|
||||
public SolrInputDocument toSolrInputDocument(SolrDocument doc) {
|
||||
SolrInputDocument sid = new SolrInputDocument();
|
||||
Set<String> omitFields = new HashSet<String>();
|
||||
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
|
||||
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
|
||||
omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
|
||||
for (String name: doc.getFieldNames()) {
|
||||
if (this.contains(name) && !omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
|
||||
}
|
||||
return sid;
|
||||
}
|
||||
|
||||
public boolean contains(YaCySchema field) {
|
||||
return this.contains(field.name());
|
||||
|
@ -318,7 +340,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
final String id, final CrawlProfile profile, final ResponseHeader responseHeader,
|
||||
final Document document, Condenser condenser, DigestURI referrerURL, String language,
|
||||
IndexCell<CitationReference> citations) {
|
||||
// we use the SolrCell design as index scheme
|
||||
// we use the SolrCell design as index schema
|
||||
final SolrInputDocument doc = new SolrInputDocument();
|
||||
final DigestURI digestURI = DigestURI.toDigestURI(document.dc_source());
|
||||
boolean allAttr = this.isEmpty();
|
||||
|
@ -445,7 +467,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
|
|||
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
|
||||
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
|
||||
|
||||
// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
|
||||
// get list of all links; they will be shrinked by urls that appear in other fields of the solr schema
|
||||
Set<MultiProtocolURI> inboundLinks = document.inboundLinks();
|
||||
Set<MultiProtocolURI> outboundLinks = document.outboundLinks();
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ public final class QueryParams {
|
|||
public List<String> facetfields;
|
||||
public int maxfacets;
|
||||
private SolrQuery cachedQuery;
|
||||
private SolrConfiguration solrScheme;
|
||||
private SolrConfiguration solrSchema;
|
||||
|
||||
// the following values are filled during the search process as statistics for the search
|
||||
public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index
|
||||
|
@ -191,9 +191,9 @@ public final class QueryParams {
|
|||
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
|
||||
this.facetfields = new ArrayList<String>();
|
||||
|
||||
this.solrScheme = indexSegment.fulltext().getSolrScheme();
|
||||
this.solrSchema = indexSegment.fulltext().getSolrSchema();
|
||||
for (YaCySchema f: defaultfacetfields) {
|
||||
if (solrScheme.contains(f)) facetfields.add(f.getSolrFieldName());
|
||||
if (solrSchema.contains(f)) facetfields.add(f.getSolrFieldName());
|
||||
}
|
||||
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
|
||||
this.maxfacets = defaultmaxfacets;
|
||||
|
@ -300,9 +300,9 @@ public final class QueryParams {
|
|||
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
|
||||
this.facetfields = new ArrayList<String>();
|
||||
|
||||
this.solrScheme = indexSegment.fulltext().getSolrScheme();
|
||||
this.solrSchema = indexSegment.fulltext().getSolrSchema();
|
||||
for (YaCySchema f: defaultfacetfields) {
|
||||
if (solrScheme.contains(f)) facetfields.add(f.getSolrFieldName());
|
||||
if (solrSchema.contains(f)) facetfields.add(f.getSolrFieldName());
|
||||
}
|
||||
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
|
||||
this.maxfacets = defaultmaxfacets;
|
||||
|
@ -432,7 +432,7 @@ public final class QueryParams {
|
|||
if (this.queryGoal.getIncludeStrings().size() == 0) return null;
|
||||
// construct query
|
||||
final SolrQuery params = new SolrQuery();
|
||||
params.setQuery(this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrScheme()).toString());
|
||||
params.setQuery(this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrSchema()).toString());
|
||||
params.setParam("defType", "edismax");
|
||||
params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
|
||||
params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
|
||||
|
@ -468,7 +468,7 @@ public final class QueryParams {
|
|||
}
|
||||
|
||||
// add author facets
|
||||
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrScheme.contains(YaCySchema.author_sxt)) {
|
||||
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(YaCySchema.author_sxt)) {
|
||||
fq.append(" AND ").append(YaCySchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"');
|
||||
}
|
||||
|
||||
|
|
|
@ -648,7 +648,7 @@ public final class SearchEvent {
|
|||
int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain
|
||||
// wait some time if we did not get so much remote results so far to get a better ranking over remote results
|
||||
// we wait at most 30 milliseconds to get a maximum total waiting time of 300 milliseconds for 10 results
|
||||
long wait = waitTimeRecommendation();
|
||||
long wait = Math.min(waitingtime, waitTimeRecommendation());
|
||||
if ( wait > 0 ) {
|
||||
//System.out.println("*** RWIProcess extra wait: " + wait + "ms; expectedRemoteReferences = " + this.expectedRemoteReferences.get() + ", receivedRemoteReferences = " + this.receivedRemoteReferences.get() + ", initialExpectedRemoteReferences = " + this.maxExpectedRemoteReferences.get());
|
||||
Thread.sleep(wait);
|
||||
|
|
Loading…
Reference in New Issue
Block a user