Full redesign of solr connection architecture. This was done to support

multiple solr cores instead of just one. Therefore it is now necessary
to distuingish between solr server connections (called an 'Instance')
and a connection to a single solr core. One Instance may now have
multiple connector classes assigned to it, each connecting to a single
core.
To support multiple cores it is also necessary to distinguish between
the connection configuration and the configuration of the index schema.
We will have multiple schema configurations in the future, each for
every solr core. This caused that the IndexFederated servlet had to be
split into two parts, the new Servlet for the Schema editor is now in
the IndexSchema Servlet.
This commit is contained in:
Michael Peter Christen 2013-02-15 01:38:10 +01:00
parent 4111606654
commit b6de1f42dc
35 changed files with 932 additions and 513 deletions

View File

@ -72,7 +72,7 @@ public class ConfigHeuristics_p {
}
if (post.containsKey("opensearch_off")) sb.setConfig("heuristic.opensearch", false);
if (post.containsKey("discoverosd")) {
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
if (!metafieldNOTavailable) {
OpenSearchConnector osc = new OpenSearchConnector(sb, false);
if (osc.discoverFromSolrIndex(sb)) {
@ -102,24 +102,24 @@ public class ConfigHeuristics_p {
}
if (post.containsKey("setopensearch")) {
// read index scheme table flags
// read index schema table flags
writeopensearchcfg (sb,post);
}
if (post.containsKey("switchsolrfieldson")) {
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
if (metafieldNOTavailable) {
ConfigurationSet.Entry entry;
entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.outboundlinks_tag_txt.name());
entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.outboundlinks_tag_txt.name());
if (entry != null && !entry.enabled()) {
entry.setEnable(true);
}
entry = sb.index.fulltext().getSolrScheme().get(YaCySchema.inboundlinks_tag_txt.name());
entry = sb.index.fulltext().getSolrSchema().get(YaCySchema.inboundlinks_tag_txt.name());
if (entry != null && !entry.enabled()) {
entry.setEnable(true);
}
try {
sb.index.fulltext().getSolrScheme().commit();
sb.index.fulltext().getSolrSchema().commit();
} catch (IOException ex) {}
}
}
@ -139,7 +139,7 @@ public class ConfigHeuristics_p {
}
}
final boolean showmetafieldbutton = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
final boolean showmetafieldbutton = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
if (showmetafieldbutton) prop.put("osdsolrfieldswitch",1);
prop.put("site.checked", sb.getConfigBool("heuristic.site", false) ? 1 : 0);
prop.put("searchresult.checked", sb.getConfigBool("heuristic.searchresults", false) ? 1 : 0);
@ -175,7 +175,7 @@ public class ConfigHeuristics_p {
}
private static void writeopensearchcfg(final Switchboard sb, final serverObjects post) {
// read index scheme table flags
// read index schema table flags
final File f = new File(sb.getDataPath(), "DATA/SETTINGS/heuristicopensearch.conf");
ConfigurationSet cfg = new ConfigurationSet(f);

View File

@ -55,7 +55,7 @@ public class CrawlResults {
final serverObjects prop = new serverObjects();
int lines = 500;
boolean showCollection = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt);
boolean showCollection = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt);
boolean showInit = env.getConfigBool("IndexMonitorInit", false);
boolean showExec = env.getConfigBool("IndexMonitorExec", false);
boolean showDate = env.getConfigBool("IndexMonitorDate", true);

View File

@ -76,7 +76,7 @@ public class CrawlStartExpert_p {
prop.put("xdstopwChecked", env.getConfigBool("xdstopw", true) ? "1" : "0");
prop.put("xpstopwChecked", env.getConfigBool("xpstopw", true) ? "1" : "0");
boolean collectionEnabled = sb.index.fulltext().getSolrScheme().isEmpty() || sb.index.fulltext().getSolrScheme().contains(YaCySchema.collection_sxt);
boolean collectionEnabled = sb.index.fulltext().getSolrSchema().isEmpty() || sb.index.fulltext().getSolrSchema().contains(YaCySchema.collection_sxt);
prop.put("collectionEnabled", collectionEnabled ? 1 : 0);
prop.put("collection", collectionEnabled ? "user" : "");

View File

@ -1,7 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Federated Index</title>
<title>YaCy '#[clientname]#': Remote Solr Configuration</title>
#%env/templates/metas.template%#
</head>
<body id="IndexFederated_p">
@ -12,7 +12,7 @@
</div>
#%env/templates/header.template%#
#%env/templates/submenuIndexControl.template%#
<h2>Federated Index</h2>
<h2>Remote Solr Configuration</h2>
<p>
YaCy supports multiple index storage locations. At this time only the YaCy-internal search index can be used for the YaCy search interface
A Solr index storage location is optional. The local index storage location can be disabled.
@ -64,38 +64,12 @@
<dt class="TableCellDark">Solr URL(s)</dt>
<dd><textarea rows="2" cols="80" name="solr.indexing.url" id="solr.indexing.url"/>#[solr.indexing.url]#</textarea><br/>
You can set one or more Solr targets here which are accessed as a shard. For several targets, list them using a ',' (comma) as separator.</dd>
<dt class="TableCellDark">Commit-Within (milliseconds)</dt>
<dd><input type="text" size="6" maxlength="6" value="#[solr.indexing.commitWithinMs]#" name="solr.indexing.commitWithinMs" id="solr.indexing.commitWithinMs"/> (increase this value to i.e. 180000 - 3 minutes - for more performance)</dd>
<dt class="TableCellDark">Lazy Value Initialization</dt>
<dd><input type="checkbox" name="solr.indexing.lazy" id="solr.indexing.lazy" #(solr.indexing.lazy.checked)#:: checked="checked"#(/solr.indexing.lazy.checked)# /> (if checked, only non-zero values and non-empty strings are written)</dd>
<dt class="TableCellDark">Sharding Method</dt>
<dd><input type="text" size="50" maxlength="50" value="#[solr.indexing.sharding]#" name="solr.indexing.sharding" id="solr.indexing.sharding" disabled="disabled"/></dd>
<dt class="TableCellDark">Scheme</dt>
<dd><input type="text" size="50" maxlength="50" value="#[solr.indexing.schemefile]#" name="solr.indexing.schemefile" id="solr.indexing.schemefile" disabled="disabled"/></dd>
<dt></dt><dd><input type="submit" name="set" value="Set" /></dd>
</dl>
</div>
<div>
<h3>Index Scheme</h3><p>If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name</p>
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Active</td>
<td>Attribute</td>
<td>Custom Solr Field Name</td>
<td>Comment</td>
</tr>
#{scheme}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td align="center"><input type="checkbox" name="scheme_#[key]#" value="checked" #(checked)#::checked="checked"#(/checked)#/></td>
<td align="left">#[key]#</td>
<td align="left"><input type="text" name="scheme_solrfieldname_#[key]#" value="#[solrfieldname]#"/></td>
<td align="left">#[comment]#</td>
</tr>
#{/scheme}#
</table>
</div>
</fieldset>
<input type="submit" name="set" value="Set" />
</form>
#%env/templates/footer.template%#

View File

@ -3,10 +3,6 @@
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 25.05.2011 at http://yacy.net
*
* $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $
* $LastChangedRevision: 7653 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
@ -26,17 +22,16 @@ import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.ArrayList;
import org.apache.solr.common.SolrException;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
import net.yacy.cora.federate.solr.connector.ShardSelection;
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.yacy.ConfigurationSet;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.OS;
@ -95,7 +90,6 @@ public class IndexFederated_p {
String solrurls = post.get("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr"));
final boolean solrRemoteIsOnAfterwards = post.getBoolean("solr.indexing.solrremote") & solrurls.length() > 0;
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, solrRemoteIsOnAfterwards);
boolean lazy = post.getBoolean("solr.indexing.lazy");
final BufferedReader r = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(UTF8.getBytes(solrurls))));
final StringBuilder s = new StringBuilder();
String s0;
@ -113,10 +107,7 @@ public class IndexFederated_p {
}
solrurls = s.toString().trim();
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, solrurls);
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy);
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, post.get("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5")));
final String schemename = post.get("solr.indexing.schemefile", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list"));
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, schemename);
if (solrRemoteWasOn && !solrRemoteIsOnAfterwards) {
// switch off
@ -133,7 +124,8 @@ public class IndexFederated_p {
final boolean usesolr = sb.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) & solrurls.length() > 0;
try {
if (usesolr) {
SolrConnector solr = new ShardSolrConnector(solrurls, ShardSelection.Method.MODULO_HOST_MD5, 10000, true);
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
sb.index.fulltext().connectRemoteSolr(solr);
} else {
sb.index.fulltext().disconnectRemoteSolr();
@ -149,35 +141,6 @@ public class IndexFederated_p {
} catch (SolrException e) {
Log.logSevere("IndexFederated_p", "change of solr connection failed", e);
}
// read index scheme table flags
final Iterator<ConfigurationSet.Entry> i = sb.index.fulltext().getSolrScheme().entryIterator();
ConfigurationSet.Entry entry;
boolean modified = false; // flag to remember changes
while (i.hasNext()) {
entry = i.next();
final String v = post.get("scheme_" + entry.key());
final String sfn = post.get("scheme_solrfieldname_" + entry.key());
if (sfn != null ) {
// set custom solr field name
if (!sfn.equals(entry.getValue())) {
entry.setValue(sfn);
modified = true;
}
}
// set enable flag
final boolean c = v != null && v.equals("checked");
if (entry.enabled() != c) {
entry.setEnable(c);
modified = true;
}
}
if (modified) { // save settings to config file if modified
try {
sb.index.fulltext().getSolrScheme().commit();
modified = false;
} catch (IOException ex) {}
}
}
// show solr host table
@ -187,7 +150,7 @@ public class IndexFederated_p {
prop.put("table", 1);
final SolrConnector solr = sb.index.fulltext().getRemoteSolr();
final long[] size = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getSizeList() : new long[]{((RemoteSolrConnector) solr).getSize()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((RemoteSolrConnector) solr).getAdminInterface()};
final String[] urls = (solr instanceof ShardSolrConnector) ? ((ShardSolrConnector) solr).getAdminInterfaceList() : new String[]{((SolrRemoteInstance) ((RemoteSolrConnector) solr).getInstance()).getAdminInterface()};
boolean dark = false;
for (int i = 0; i < size.length; i++) {
prop.put("table_list_" + i + "_dark", dark ? 1 : 0); dark = !dark;
@ -197,34 +160,12 @@ public class IndexFederated_p {
prop.put("table_list", size.length);
}
// write scheme
final String schemename = sb.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list");
int c = 0;
boolean dark = false;
// use enum SolrField to keep defined order
for(YaCySchema field : YaCySchema.values()) {
prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.put("scheme_" + c + "_checked", sb.index.fulltext().getSolrScheme().contains(field.name()) ? 1 : 0);
prop.putHTML("scheme_" + c + "_key", field.name());
prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment());
c++;
}
prop.put("scheme", c);
// fill attribute fields
// allowed values are: classic, solr, off
// federated.service.yacy.indexing.engine = classic
prop.put(SwitchboardConstants.CORE_SERVICE_FULLTEXT + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_FULLTEXT, false) ? 1 : 0);
prop.put(SwitchboardConstants.CORE_SERVICE_RWI + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, false) ? 1 : 0);
prop.put(SwitchboardConstants.CORE_SERVICE_CITATION + ".checked", env.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, false) ? 1 : 0);
prop.put("solr.indexing.solrremote.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_ENABLED, false) ? 1 : 0);
prop.put("solr.indexing.url", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr").replace(",", "\n"));
prop.put("solr.indexing.lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0);
prop.put("solr.indexing.sharding", env.getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SHARDING, "modulo-host-md5"));
prop.put("solr.indexing.schemefile", schemename);
if ((sb.index.fulltext().connectedURLDb())) {
prop.put("migrateUrlDbtoSolr", 1);

48
htroot/IndexSchema_p.html Normal file
View File

@ -0,0 +1,48 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Solr Schema Editor</title>
#%env/templates/metas.template%#
</head>
<body id="IndexSchema_p">
<div id="api">
<a href="/api/schema.xml">
<img src="env/grafics/api.png" width="60" height="40" alt="API" /></a>
<span>The solr schema can also be retrieved as xml here. Click the API icon to see the xml. Just copy this xml to solr/conf/schema.xml to configure solr.</span>
</div>
#%env/templates/header.template%#
#%env/templates/submenuIndexControl.template%#
<h2>Solr Schema Editor</h2>
<p>If you use a custom Solr schema you may enter a different field name in the column 'Custom Solr Field Name' of the YaCy default attribute name</p>
<form action="IndexSchema_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
<fieldset>
<div>
<table class="sortable" border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Active</td>
<td>Attribute</td>
<td>Custom Solr Field Name</td>
<td>Comment</td>
</tr>
#{schema}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td align="center"><input type="checkbox" name="schema_#[key]#" value="checked" #(checked)#::checked="checked"#(/checked)#/></td>
<td align="left">#[key]#</td>
<td align="left"><input type="text" name="schema_solrfieldname_#[key]#" value="#[solrfieldname]#"/></td>
<td align="left">#[comment]#</td>
</tr>
#{/schema}#
</table>
<dl>
<dt class="TableCellDark">Lazy Value Initialization</dt>
<dd><input type="checkbox" name="lazy" id="lazy" #(lazy.checked)#:: checked="checked"#(/lazy.checked)# /> (if checked, only non-zero values and non-empty strings are written)</dd>
</dl>
</div>
</fieldset>
<input type="submit" name="set" value="Set" />
</form>
#%env/templates/footer.template%#
</body>
</html>

92
htroot/IndexSchema_p.java Normal file
View File

@ -0,0 +1,92 @@
/**
* IndexSchemaFulltext_p
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 13.02.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.util.Iterator;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.yacy.ConfigurationSet;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class IndexSchema_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
if (post != null && post.containsKey("set")) {
// read index schema table flags
final Iterator<ConfigurationSet.Entry> i = sb.index.fulltext().getSolrSchema().entryIterator();
ConfigurationSet.Entry entry;
boolean modified = false; // flag to remember changes
while (i.hasNext()) {
entry = i.next();
final String v = post.get("schema_" + entry.key());
final String sfn = post.get("schema_solrfieldname_" + entry.key());
if (sfn != null ) {
// set custom solr field name
if (!sfn.equals(entry.getValue())) {
entry.setValue(sfn);
modified = true;
}
}
// set enable flag
final boolean c = v != null && v.equals("checked");
if (entry.enabled() != c) {
entry.setEnable(c);
modified = true;
}
}
if (modified) { // save settings to config file if modified
try {
sb.index.fulltext().getSolrSchema().commit();
modified = false;
} catch (IOException ex) {}
}
boolean lazy = post.getBoolean("lazy");
env.setConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, lazy);
}
int c = 0;
boolean dark = false;
// use enum SolrField to keep defined order
for(YaCySchema field : YaCySchema.values()) {
prop.put("schema_" + c + "_dark", dark ? 1 : 0); dark = !dark;
prop.put("schema_" + c + "_checked", sb.index.fulltext().getSolrSchema().contains(field.name()) ? 1 : 0);
prop.putHTML("schema_" + c + "_key", field.name());
prop.putHTML("schema_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
if (field.getComment() != null) prop.putHTML("schema_" + c + "_comment",field.getComment());
c++;
}
prop.put("schema", c);
prop.put("lazy.checked", env.getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true) ? 1 : 0);
// return rewrite properties
return prop;
}
}

View File

@ -39,11 +39,11 @@ public class schema {
final servletProperties prop = new servletProperties();
final Switchboard sb = (Switchboard) env;
// write scheme
// write schema
int c = 0;
SolrConfiguration solrScheme = sb.index.fulltext().getSolrScheme();
SolrConfiguration solrSchema = sb.index.fulltext().getSolrSchema();
for (YaCySchema field : YaCySchema.values()) {
if (solrScheme.contains(field.name())) {
if (solrSchema.contains(field.name())) {
addField(prop, c, field);
c++;
}
@ -51,13 +51,13 @@ public class schema {
//if (solrScheme.contains(YaCySchema.author)) {addField(prop, c, YaCySchema.author_sxt);}
prop.put("fields", c);
prop.put("copyFieldAuthor", solrScheme.contains(YaCySchema.author) ? 1 : 0);
prop.put("copyFieldAuthor", solrSchema.contains(YaCySchema.author) ? 1 : 0);
prop.put("solruniquekey",YaCySchema.id.getSolrFieldName());
prop.put("solrdefaultsearchfield",
solrScheme.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() :
solrScheme.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() :
solrScheme.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() :
solrSchema.contains(YaCySchema.text_t) ? YaCySchema.text_t.getSolrFieldName() :
solrSchema.contains(YaCySchema.fuzzy_signature_text_t) ? YaCySchema.fuzzy_signature_text_t.getSolrFieldName() :
solrSchema.contains(YaCySchema.h1_txt) ? YaCySchema.h1_txt.getSolrFieldName() :
YaCySchema.id.getSolrFieldName()
);

View File

@ -2,7 +2,8 @@
<h3>Index Administration</h3>
<ul class="SubMenu">
<li><a href="/IndexControlURLs_p.html" class="MenuItemLink lock">Database Administration</a></li>
<li><a href="/IndexFederated_p.html" class="MenuItemLink lock">Federated Solr Index</a></li>
<li><a href="/IndexSchema_p.html" class="MenuItemLink lock">Solr Schema Editor</a></li>
<li><a href="/IndexFederated_p.html" class="MenuItemLink lock">Remote Solr Configuration</a></li>
#(p2p)#::<li><a href="/IndexControlRWIs_p.html" class="MenuItemLink lock">Reverse Word Index Administration</a></li>#(/p2p)#
<!--<li><a href="/IndexControlCleaner_p.html" class="MenuItemLink lock">Index Cleaner</a></li>-->
<li><a href="/ConfigHTCache_p.html" class="MenuItemLink lock">Web Cache</a></li>

View File

@ -114,7 +114,7 @@ public class searchresult {
// get a solr query string
QueryGoal qg = new QueryGoal(originalQuery, originalQuery);
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrScheme());
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrSchema());
post.put("defType", "edismax");
post.put(CommonParams.Q, solrQ.toString());
post.put(CommonParams.ROWS, post.remove("num"));

View File

@ -180,7 +180,7 @@ public class OpenSearchConnector {
Log.logSevere("OpenSearchConnector.Discover", "Error on connecting to embedded Solr index");
return false;
}
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrScheme().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
final boolean metafieldNOTavailable = sb.index.fulltext().getSolrSchema().containsDisabled(YaCySchema.outboundlinks_tag_txt.name());
if (metafieldNOTavailable) {
Log.logWarning("OpenSearchConnector.Discover", "Solr Schema field outboundlinks_tag_txt must be switched on");
return false;

View File

@ -27,7 +27,7 @@ public interface Schema {
* this shall be implemented as enum, thus shall have the name() method
* @return the name of the enum constant
*/
public String name(); // default field name (according to SolCell default scheme) <= enum.name()
public String name(); // default field name (according to SolCell default schema) <= enum.name()
public String getSolrFieldName(); // return the default or custom solr field name to use for solr requests

View File

@ -352,24 +352,5 @@ public enum YaCySchema implements Schema {
doc.setField(this.getSolrFieldName(), value);
}
/**
* Convert a SolrDocument to a SolrInputDocument.
* This is useful if a document from the search index shall be modified and indexed again.
* This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
* which are created automatically during the indexing process.
* @param doc the solr document
* @return a solr input document
*/
public static SolrInputDocument toSolrInputDocument(SolrDocument doc) {
SolrInputDocument sid = new SolrInputDocument();
Set<String> omitFields = new HashSet<String>();
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
for (String name: doc.getFieldNames()) {
if (!omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
}
return sid;
}
}

View File

@ -213,12 +213,12 @@ public abstract class AbstractSolrConnector implements SolrConnector {
params.setRows(0);
params.setStart(0);
params.setFacet(false);
//params.setFields(YaCySchema.id.getSolrFieldName());
params.setFields(YaCySchema.id.getSolrFieldName());
// query the server
QueryResponse rsp = query(params);
final SolrDocumentList docs = rsp.getResults();
return docs.getNumFound();
return docs == null ? 0 : docs.getNumFound();
}
/**

View File

@ -21,15 +21,16 @@
package net.yacy.cora.federate.solr.connector;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import net.yacy.cora.federate.solr.instance.SolrEmbeddedInstance;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
@ -39,113 +40,113 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import com.google.common.io.Files;
import org.apache.solr.servlet.SolrRequestParsers;
public class EmbeddedSolrConnector extends SolrServerConnector implements SolrConnector {
public static final String SELECT = "/select";
public static final String CONTEXT = "/solr";
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
private CoreContainer cores;
private String defaultCoreName;
private SolrCore defaultCore;
private final SearchHandler requestHandler;
private final File storagePath;
private final SolrEmbeddedInstance instance;
private SolrCore core;
public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException {
public EmbeddedSolrConnector(SolrEmbeddedInstance instance) {
super();
// copy the solrconfig.xml to the storage path
this.storagePath = storagePath;
File conf = new File(storagePath, "conf");
conf.mkdirs();
File source, target;
for (String cf: confFiles) {
source = new File(solr_config, cf);
if (source.isDirectory()) {
target = new File(conf, cf);
target.mkdirs();
for (String cfl: source.list()) {
try {
Files.copy(new File(source, cfl), new File(target, cfl));
} catch (IOException e) {
e.printStackTrace();
}
}
} else {
target = new File(conf, cf);
target.getParentFile().mkdirs();
try {
Files.copy(source, target);
} catch (IOException e) {
e.printStackTrace();
}
}
}
this.cores = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml")); // this may take indefinitely long if solr files are broken
if (this.cores == null) {
// try again
System.gc();
this.cores = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml"));
}
this.defaultCoreName = this.cores.getDefaultCoreName();
Log.logInfo("EmbeddedSolrConnector", "detected default solr core: " + this.defaultCoreName);
this.defaultCore = this.cores.getCore(this.defaultCoreName); // should be "collection1"
if (this.defaultCore == null) {
// try again
Collection<SolrCore> cores = this.cores.getCores();
if (cores.size() > 0) {
this.defaultCore = cores.iterator().next();
this.defaultCoreName = this.defaultCore.getName();
}
}
if (this.defaultCore == null) {
throw new IOException("cannot get the default core; available = " + MemoryControl.available() + ", free = " + MemoryControl.free());
}
final NamedList<Object> config = new NamedList<Object>();
this.instance = instance;
this.core = this.instance.getDefaultCore();
this.requestHandler = new SearchHandler();
this.requestHandler.init(config);
this.requestHandler.inform(this.defaultCore);
super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName));
this.requestHandler.init(new NamedList<Object>());
this.requestHandler.inform(this.core);
super.init(this.instance.getDefaultServer());
}
public File getStoragePath() {
return this.storagePath;
public EmbeddedSolrConnector(SolrEmbeddedInstance instance, String coreName) {
super();
this.instance = instance;
this.core = this.instance.getCore(coreName);
this.requestHandler = new SearchHandler();
this.requestHandler.init(new NamedList<Object>());
this.requestHandler.inform(this.core);
super.init(this.instance.getServer(coreName));
}
public SolrInstance getInstance() {
return this.instance;
}
public SolrCore getCore() {
return this.defaultCore;
return this.core;
}
public SolrConfig getConfig() {
return this.defaultCore.getSolrConfig();
return this.core.getSolrConfig();
}
private static final SolrRequestParsers _parser = new SolrRequestParsers(null);
/**
* get the size of the index. We override the implementation in SolrServerConnector
* because we can do this with more efficiently in a different way for embedded indexes.
*/
@Override
public long getSize() {
// do some magic here to prevent the super.getSize() call which is a bad hack
return super.getSize();
String threadname = Thread.currentThread().getName();
Thread.currentThread().setName("solr query: size");
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
CoreContainer coreContainer = ess.getCoreContainer();
String coreName = coreContainer.getDefaultCoreName();
SolrCore core = coreContainer.getCore(coreName);
if (core == null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "No such core: " + coreName);
try {
SolrParams params = AbstractSolrConnector.catchSuccessQuery;
QueryRequest request = new QueryRequest(AbstractSolrConnector.catchSuccessQuery);
SolrQueryRequest req = _parser.buildRequestFrom(core, params, request.getContentStreams());
String path = "/select";
req.getContext().put("path", path);
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
SolrRequestHandler handler = core.getRequestHandler(path);
SearchHandler sh = (SearchHandler) handler;
List<SearchComponent> components = sh.getComponents();
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
QueryComponent qc = (QueryComponent) components.get(0);
qc.prepare(rb);
qc.process(rb);
qc.finishStage(rb);
int hits = rb.getResults().docList.matches();
if (req != null) req.close();
core.close();
SolrRequestInfo.clearRequestInfo();
Thread.currentThread().setName(threadname);
return hits;
} catch (final Throwable e) {
log.warn(e);
Thread.currentThread().setName(threadname);
return 0;
}
}
@Override
public synchronized void close() {
try {this.commit(false);} catch (Throwable e) {Log.logException(e);}
try {super.close();} catch (Throwable e) {Log.logException(e);}
try {this.defaultCore.close();} catch (Throwable e) {Log.logException(e);}
try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);}
try {this.core.close();} catch (Throwable e) {Log.logException(e);}
}
public SolrQueryRequest request(final SolrParams params) {
SolrQueryRequest req = null;
req = new SolrQueryRequestBase(this.defaultCore, params){};
req = new SolrQueryRequestBase(this.core, params){};
req.getContext().put("path", SELECT);
req.getContext().put("webapp", CONTEXT);
return req;

View File

@ -21,11 +21,14 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
@ -51,8 +54,8 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
private final static Object EXIST = new Object();
private SolrConnector solr0;
private SolrConnector solr1;
private EmbeddedSolrConnector solr0;
private ShardSolrConnector solr1;
private int hitCacheMax, missCacheMax, partitions;
private final Map<String, HitMissCache> hitMissCache;
private final Map<String, ARC<String, Object>> fieldCache; // a map from a field name to a id-key/value object cache
@ -106,11 +109,18 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return c;
}
/**
* the usage of getInstance is not possible here, use getSolr0().getInstance() instead
*/
public SolrInstance getInstance() {
throw new UnsupportedOperationException();
}
public boolean isConnected0() {
return this.solr0 != null;
}
public void connect0(SolrConnector c) {
public void connect0(EmbeddedSolrConnector c) {
this.solr0 = c;
}
@ -120,7 +130,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
public void disconnect0() {
if (this.solr0 == null) return;
SolrInstance instance = this.solr0.getInstance();
this.solr0.close();
instance.close();
this.solr0 = null;
}
@ -128,7 +140,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return this.solr1 != null;
}
public void connect1(SolrConnector c) {
public void connect1(ShardSolrConnector c) {
this.solr1 = c;
}
@ -138,6 +150,7 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
public void disconnect1() {
if (this.solr1 == null) return;
// we cannot get the instance here because that is not applicable
this.solr1.close();
this.solr1 = null;
}
@ -165,8 +178,16 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
@Override
public synchronized void close() {
if (this.solr0 != null) this.solr0.close();
if (this.solr1 != null) this.solr1.close();
if (this.solr0 != null) {
SolrInstance instance = this.solr0.getInstance();
this.solr0.close();
instance.close();
}
if (this.solr1 != null) {
ArrayList<SolrRemoteInstance> instances = this.solr1.getInstances();
this.solr1.close();
for (SolrRemoteInstance instance: instances) instance.close();
}
}
/**

View File

@ -24,6 +24,9 @@ import java.io.IOException;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@ -37,20 +40,20 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
private final AddWorker[] worker;
private final SolrConnector solr;
public MultipleSolrConnector(final String url, final int connections) throws IOException {
this.solr = new RemoteSolrConnector(url);
public MultipleSolrConnector(final SolrRemoteInstance instance, final String corename, final int connections) {
this.solr = new RemoteSolrConnector(instance, corename);
this.queue = new ArrayBlockingQueue<SolrInputDocument>(1000);
this.worker = new AddWorker[connections];
for (int i = 0; i < connections; i++) {
this.worker[i] = new AddWorker(url);
this.worker[i] = new AddWorker(instance, corename);
this.worker[i].start();
}
}
private class AddWorker extends Thread {
private final SolrConnector solr;
public AddWorker(final String url) throws IOException {
this.solr = new RemoteSolrConnector(url);
public AddWorker(final SolrRemoteInstance instance, final String corename) {
this.solr = new RemoteSolrConnector(instance, corename);
}
@Override
public void run() {

View File

@ -22,145 +22,55 @@ package net.yacy.cora.federate.solr.connector;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.logging.Log;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import org.apache.commons.httpclient.HttpException;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.apache.solr.client.solrj.ResponseParser;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
public class RemoteSolrConnector extends SolrServerConnector implements SolrConnector {
private final String solrurl, host, solrpath, solraccount, solrpw;
private DefaultHttpClient client;
private final int port;
SolrRemoteInstance instance;
String corename;
/**
* create a new solr connector
* @param url the solr url, like http://192.168.1.60:8983/solr/ or http://admin:pw@192.168.1.60:8983/solr/
* @param scheme
* @param instance the instance of the remote solr url, like http://192.168.1.60:8983/solr/ or http://admin:pw@192.168.1.60:8983/solr/
* @throws IOException
*/
public RemoteSolrConnector(final String url) throws IOException {
public RemoteSolrConnector(final SolrRemoteInstance instance) throws IOException {
super();
this.solrurl = url;
// connect using authentication
final MultiProtocolURI u = new MultiProtocolURI(this.solrurl);
this.host = u.getHost();
this.port = u.getPort();
this.solrpath = u.getPath();
final String userinfo = u.getUserInfo();
if (userinfo == null || userinfo.isEmpty()) {
this.solraccount = ""; this.solrpw = "";
} else {
final int p = userinfo.indexOf(':');
if (p < 0) {
this.solraccount = userinfo; this.solrpw = "";
} else {
this.solraccount = userinfo.substring(0, p); this.solrpw = userinfo.substring(p + 1);
}
}
HttpSolrServer s;
if (this.solraccount.length() > 0) {
PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); // try also: ThreadSafeClientConnManager
cm.setMaxTotal(100);
this.client = new DefaultHttpClient(cm) {
@Override
protected HttpContext createHttpContext() {
HttpContext context = super.createHttpContext();
AuthCache authCache = new BasicAuthCache();
BasicScheme basicAuth = new BasicScheme();
HttpHost targetHost = new HttpHost(u.getHost(), u.getPort(), u.getProtocol());
authCache.put(targetHost, basicAuth);
context.setAttribute(ClientContext.AUTH_CACHE, authCache);
return context;
}
};
this.client.addRequestInterceptor(new HttpRequestInterceptor() {
@Override
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
if (!request.containsHeader("Accept-Encoding")) request.addHeader("Accept-Encoding", "gzip");
}
});
this.client.addResponseInterceptor(new HttpResponseInterceptor() {
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
HttpEntity entity = response.getEntity();
if (entity != null) {
Header ceheader = entity.getContentEncoding();
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (HeaderElement codec : codecs) {
if (codec.getName().equalsIgnoreCase("gzip")) {
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;
}
}
}
}
}
});
BasicCredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(new AuthScope(this.host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(this.solraccount, this.solrpw));
this.client.setCredentialsProvider(credsProvider);
String p = "http://" + this.host + ":" + this.port + this.solrpath;
Log.logInfo("RemoteSolrConnector", "connecting Solr authenticated with url:" + p);
s = new HttpSolrServer(p, this.client);
} else {
Log.logInfo("RemoteSolrConnector", "connecting Solr with url:" + this.solrurl);
s = new HttpSolrServer(this.solrurl);
}
s.setAllowCompression(true);
s.setConnectionTimeout(60000);
s.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated)
s.setSoTimeout(60000);
this.instance = instance;
this.corename = this.instance.getDefaultCoreName();
SolrServer s = instance.getServer(this.corename);
super.init(s);
}
public void terminate() {
if (this.client != null) this.client.getConnectionManager().shutdown();
public RemoteSolrConnector(final SolrRemoteInstance instance, String corename) {
super();
this.instance = instance;
this.corename = corename == null ? this.instance.getDefaultCoreName() : corename;
SolrServer s = instance.getServer(this.corename);
super.init(s);
}
public SolrInstance getInstance() {
return this.instance;
}
@Override
public synchronized void close() {
super.close();
this.terminate();
}
@Override
public QueryResponse query(ModifiableSolrParams params) throws IOException {
try {
// during the solr query we set the thread name to the query string to get more debugging info in thread dumps
String q = params.get("q");
String threadname = Thread.currentThread().getName();
@ -170,32 +80,30 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
ResponseParser responseParser = new XMLResponseParser();
request.setResponseParser(responseParser);
long t = System.currentTimeMillis();
NamedList<Object> result = server.request(request);
NamedList<Object> result;
try {
result = server.request(request);
} catch (Throwable e) {
server = instance.getServer(this.corename);
super.init(server);
try {
result = server.request(request);
} catch (Throwable e1) {
throw new IOException(e1.getMessage());
}
}
QueryResponse response = new QueryResponse(result, server);
response.setElapsedTime(System.currentTimeMillis() - t);
if (q != null) Thread.currentThread().setName(threadname);
return response;
} catch (Throwable e) {
throw new IOException("Error executing query", e);
}
}
public String getAdminInterface() {
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();
String u = this.solrurl;
int p = u.indexOf("localhost",0);
if (p < 0) p = u.indexOf("127.0.0.1",0);
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
return u + (u.endsWith("/") ? "admin/" : "/admin/");
}
public static void main(final String args[]) {
RemoteSolrConnector solr;
try {
solr = new RemoteSolrConnector("http://127.0.0.1:8983/solr");
SolrRemoteInstance instance = new SolrRemoteInstance("http://127.0.0.1:8983/solr/");
solr = new RemoteSolrConnector(instance, "solr");
solr.clear();
final File exampleDir = new File("test/parsertest/");
long t, t0, a = 0;
@ -211,8 +119,10 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
System.out.println("pushed file " + s + " to solr, " + t0 + " milliseconds");
}
System.out.println("pushed " + c + " files in " + a + " milliseconds, " + (a / c) + " milliseconds average; " + (60000 / a * c) + " PPM");
solr.commit(false);
} catch (final IOException e) {
e.printStackTrace();
}
System.exit(0);
}
}

View File

@ -23,6 +23,8 @@ package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.util.List;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;

View File

@ -1,5 +1,5 @@
/**
* SolrChardingConnector
* ShardSolrConnector
* Copyright 2011 by Michael Peter Christen
* First released 25.05.2011 at http://yacy.net
*
@ -21,7 +21,6 @@
package net.yacy.cora.federate.solr.connector;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -30,32 +29,67 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
public class ShardSolrConnector extends AbstractSolrConnector implements SolrConnector {
private final List<SolrConnector> connectors;
private final ArrayList<SolrRemoteInstance> instances;
private final ArrayList<SolrConnector> connectors;
private final ShardSelection sharding;
private final String[] urls;
private final String[] adminInterfaces;
public ShardSolrConnector(final String urlList, final ShardSelection.Method method, final long timeout, boolean multipleConnections) throws IOException {
urlList.replace(' ', ',');
this.urls = urlList.split(",");
public ShardSolrConnector(
ArrayList<SolrRemoteInstance> instances,
final ShardSelection.Method method, boolean multipleConnections) {
this.instances = instances;
this.connectors = new ArrayList<SolrConnector>();
SolrConnector s;
for (final String u: this.urls) {
s = multipleConnections ? new MultipleSolrConnector(u.trim(), 2) : new RemoteSolrConnector(u.trim());
this.connectors.add(new RetrySolrConnector(s, timeout));
this.adminInterfaces = new String[instances.size()];
int c = 0;
String defaultCoreName = instances.get(0).getDefaultCoreName();
for (final SolrRemoteInstance instance: instances) {
adminInterfaces[c++] = instance.getAdminInterface();
s = multipleConnections ? new MultipleSolrConnector(instance, defaultCoreName, 2) : new RemoteSolrConnector(instance, defaultCoreName);
this.connectors.add(s /*new RetrySolrConnector(s, timeout)*/);
}
this.sharding = new ShardSelection(method, this.urls.length);
this.sharding = new ShardSelection(method, this.connectors.size());
}
public static ArrayList<SolrRemoteInstance> getShardInstances(final String urlList) throws IOException {
urlList.replace(' ', ',');
String[] urls = urlList.split(",");
ArrayList<SolrRemoteInstance> instances = new ArrayList<SolrRemoteInstance>();
for (final String u: urls) {
SolrRemoteInstance instance = new SolrRemoteInstance(u);
instances.add(instance);
}
return instances;
}
public static ArrayList<SolrRemoteInstance> getShardInstances(final String urlList, Collection<String> coreNames, String defaultCoreName) throws IOException {
urlList.replace(' ', ',');
String[] urls = urlList.split(",");
ArrayList<SolrRemoteInstance> instances = new ArrayList<SolrRemoteInstance>();
for (final String u: urls) {
SolrRemoteInstance instance = new SolrRemoteInstance(u, coreNames, defaultCoreName);
instances.add(instance);
}
return instances;
}
public ArrayList<SolrRemoteInstance> getInstances() {
return this.instances;
}
@Override
@ -67,6 +101,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
* force an explicit merge of segments
* @param maxSegments the maximum number of segments. Set to 1 for maximum optimization
*/
@Override
public void optimize(int maxSegments) {
for (final SolrConnector connector: this.connectors) connector.optimize(maxSegments);
}
@ -185,11 +220,70 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
@Override
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
final SimpleOrderedMap<Object> facet_countsAcc = new SimpleOrderedMap<Object>();
final SimpleOrderedMap<Object> highlightingAcc = new SimpleOrderedMap<Object>();
final SimpleOrderedMap<Object> headerAcc = new SimpleOrderedMap<Object>();
final SolrDocumentList resultsAcc = new SolrDocumentList();
// concurrently call all shards
List<Thread> t = new ArrayList<Thread>();
for (final SolrConnector connector: this.connectors) {
QueryResponse rsp = connector.query(query);
if (rsp != null && rsp.getResults().size() > 0) return rsp;
Thread t0 = new Thread() {
@SuppressWarnings("unchecked")
@Override
public void run() {
QueryResponse rsp;
try {
rsp = connector.query(query);
} catch (Throwable e) {return;}
NamedList<Object> response = rsp.getResponse();
// set the header; this is mostly always the same (well this is not evaluated much)
SimpleOrderedMap<Object> header = (SimpleOrderedMap<Object>) response.get("responseHeader");
//Integer status = (Integer) header.get("status");
//Integer QTime = (Integer) header.get("QTime");
//SimpleOrderedMap<Object> params = (SimpleOrderedMap<Object>) header.get("params");
if (headerAcc.size() == 0) {
for (Map.Entry<String, Object> e: header) headerAcc.add(e.getKey(), e.getValue());
}
return new QueryResponse();
// accumulate the results
SolrDocumentList results = (SolrDocumentList) response.get("response");
long found = results.size();
for (int i = 0; i < found; i++) resultsAcc.add(results.get(i));
resultsAcc.setNumFound(resultsAcc.getNumFound() + results.getNumFound());
resultsAcc.setMaxScore(Math.max(resultsAcc.getMaxScore() == null ? 0f : resultsAcc.getMaxScore().floatValue(), results.getMaxScore() == null ? 0f : results.getMaxScore().floatValue()));
// accumulate the highlighting
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) response.get("highlighting");
if (highlighting != null) {
for (Map.Entry<String, Object> e: highlighting) highlightingAcc.add(e.getKey(), e.getValue());
}
// accumulate the facets (well this is not correct at this time...)
SimpleOrderedMap<Object> facet_counts = (SimpleOrderedMap<Object>) response.get("facet_counts");
if (facet_counts != null) {
for (Map.Entry<String, Object> e: facet_counts) facet_countsAcc.add(e.getKey(), e.getValue());
}
}
};
t0.start();
t.add(t0);
}
for (Thread t0: t) {
try {t0.join();} catch (InterruptedException e) {}
}
// prepare combined response
QueryResponse rspAcc = new QueryResponse();
NamedList<Object> nl = new NamedList<Object>();
nl.add("responseHeader", headerAcc);
nl.add("response", resultsAcc);
if (highlightingAcc != null && highlightingAcc.size() > 0) nl.add("highlighting", highlightingAcc);
if (facet_countsAcc != null && facet_countsAcc.size() > 0) nl.add("facet_counts", facet_countsAcc);
rspAcc.setResponse(nl);
return rspAcc;
}
@Override
@ -250,18 +344,7 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
}
public String[] getAdminInterfaceList() {
final String[] urlAdmin = new String[this.connectors.size()];
int i = 0;
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
final String localhostExtern = localhostExternAddress == null ? Domains.LOCALHOST : localhostExternAddress.getHostAddress();
for (String u: this.urls) {
int p = u.indexOf("localhost",0);
if (p < 0) p = u.indexOf("127.0.0.1",0);
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
urlAdmin[i++] = u + (u.endsWith("/") ? "admin/" : "/admin/");
}
return urlAdmin;
return this.adminInterfaces;
}
}

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import net.yacy.cora.federate.solr.instance.SolrInstance;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.client.solrj.response.QueryResponse;

View File

@ -28,28 +28,15 @@ import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.kelondro.logging.Log;
import org.apache.log4j.Logger;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.servlet.SolrRequestParsers;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
@ -74,9 +61,9 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
//if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).getHttpClient().getConnectionManager().closeExpiredConnections();
try {
this.server.commit(true, true, softCommit);
if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).shutdown();
//if (this.server instanceof HttpSolrServer) ((HttpSolrServer) this.server).shutdown();
} catch (Throwable e) {
Log.logException(e);
//Log.logException(e);
}
}
@ -95,67 +82,15 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
@Override
public synchronized void close() {
try {
if (this.server != null) synchronized (this.server) {this.server.commit(true, true, false);}
if (this.server != null && this.server instanceof EmbeddedSolrServer) synchronized (this.server) {this.server.commit(true, true, false);}
this.server = null;
} catch (Throwable e) {
Log.logException(e);
}
}
private static final SolrRequestParsers _parser = new SolrRequestParsers(null);
@Override
public long getSize() {
String threadname = Thread.currentThread().getName();
Thread.currentThread().setName("solr query: size");
if (this.server instanceof EmbeddedSolrServer) {
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
CoreContainer coreContainer = ess.getCoreContainer();
String coreName = coreContainer.getDefaultCoreName();
SolrCore core = coreContainer.getCore(coreName);
if (core == null) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "No such core: " + coreName);
try {
SolrParams params = AbstractSolrConnector.catchSuccessQuery;
QueryRequest request = new QueryRequest(AbstractSolrConnector.catchSuccessQuery);
SolrQueryRequest req = _parser.buildRequestFrom(core, params, request.getContentStreams());
String path = "/select";
req.getContext().put("path", path);
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
SolrRequestHandler handler = core.getRequestHandler(path);
SearchHandler sh = (SearchHandler) handler;
List<SearchComponent> components = sh.getComponents();
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
QueryComponent qc = (QueryComponent) components.get(0);
qc.prepare(rb);
qc.process(rb);
qc.finishStage(rb);
int hits = rb.getResults().docList.matches();
if (req != null) req.close();
core.close();
SolrRequestInfo.clearRequestInfo();
Thread.currentThread().setName(threadname);
return hits;
} catch (final Throwable e) {
log.warn(e);
Thread.currentThread().setName(threadname);
return 0;
}
}
Thread.currentThread().setName(threadname);
return getSize0();
}
public long getSize0() {
/*
if (this.server instanceof EmbeddedSolrServer) {
EmbeddedSolrServer ess = (EmbeddedSolrServer) this.server;
CoreContainer coreContainer = ess.getCoreContainer();
String coreName = coreContainer.getDefaultCoreName();
SolrCore core = coreContainer.getCore(coreName);
}
*/
try {
final QueryResponse rsp = query(AbstractSolrConnector.catchSuccessQuery);
if (rsp == null) return 0;

View File

@ -0,0 +1,135 @@
/**
* SolrEmbeddedInstance
* Copyright 2013 by Michael Peter Christen
* First released 13.02.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.instance;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import com.google.common.io.Files;
public class SolrEmbeddedInstance implements SolrInstance {
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "xslt/example.xsl", "xslt/json.xsl", "lang/"};
private CoreContainer cores;
private String defaultCoreName;
private SolrCore defaultCore;
private SolrServer defaultServer;
private File storagePath;
public SolrEmbeddedInstance(final File corePath, final File solr_config) throws IOException {
super();
// copy the solrconfig.xml to the storage path
this.storagePath = corePath;
File conf = new File(corePath, "conf");
conf.mkdirs();
File source, target;
for (String cf: confFiles) {
source = new File(solr_config, cf);
if (source.isDirectory()) {
target = new File(conf, cf);
target.mkdirs();
for (String cfl: source.list()) {
try {
Files.copy(new File(source, cfl), new File(target, cfl));
} catch (IOException e) {
e.printStackTrace();
}
}
} else {
target = new File(conf, cf);
target.getParentFile().mkdirs();
try {
Files.copy(source, target);
} catch (IOException e) {
e.printStackTrace();
}
}
}
String dir = corePath.getAbsolutePath();
File configFile = new File(solr_config, "solr.xml");
this.cores = new CoreContainer(dir, configFile); // this may take indefinitely long if solr files are broken
if (this.cores == null) throw new IOException("cannot create core container dir = " + dir + ", configFile = " + configFile);
this.defaultCoreName = this.cores.getDefaultCoreName();
Log.logInfo("SolrEmbeddedInstance", "detected default solr core: " + this.defaultCoreName);
this.defaultCore = this.cores.getCore(this.defaultCoreName); // should be "collection1"
if (this.defaultCore == null) {
// try again
Collection<SolrCore> cores = this.cores.getCores();
if (cores.size() > 0) {
this.defaultCore = cores.iterator().next();
this.defaultCoreName = this.defaultCore.getName();
}
}
if (this.defaultCore == null) {
throw new IOException("cannot get the default core; available = " + MemoryControl.available() + ", free = " + MemoryControl.free());
}
this.defaultServer = new EmbeddedSolrServer(this.cores, this.defaultCoreName);
}
public File getStoragePath() {
return this.storagePath;
}
@Override
public String getDefaultCoreName() {
return this.defaultCoreName;
}
@Override
public Collection<String> getCoreNames() {
return this.cores.getCoreNames();
}
@Override
public SolrServer getDefaultServer() {
return this.defaultServer;
}
@Override
public SolrServer getServer(String name) {
return new EmbeddedSolrServer(this.cores, name);
}
public SolrCore getDefaultCore() {
return this.defaultCore;
}
public SolrCore getCore(String name) {
return this.cores.getCore(name);
}
@Override
public synchronized void close() {
try {this.cores.shutdown();} catch (Throwable e) {Log.logException(e);}
}
}

View File

@ -0,0 +1,38 @@
/**
* SolrInstance
* Copyright 2013 by Michael Peter Christen
* First released 13.02.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.instance;
import java.util.Collection;
import org.apache.solr.client.solrj.SolrServer;
public interface SolrInstance {
public String getDefaultCoreName();
public Collection<String> getCoreNames();
public SolrServer getDefaultServer();
public SolrServer getServer(String name);
public void close();
}

View File

@ -0,0 +1,226 @@
/**
* SolrRemoteInstance
* Copyright 2013 by Michael Peter Christen
* First released 13.02.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.instance;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.kelondro.logging.Log;
import org.apache.commons.httpclient.HttpException;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.AuthCache;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicAuthCache;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
public class SolrRemoteInstance implements SolrInstance {
private String solrurl;
private DefaultHttpClient client;
private String defaultCoreName;
private HttpSolrServer defaultServer;
private Collection<String> coreNames;
public SolrRemoteInstance(final String url) throws IOException {
this(url, null, url.endsWith("solr/") || url.endsWith("solr") ? "solr" : "shard0");
}
public SolrRemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName) throws IOException {
this.solrurl = url;
if (this.solrurl == null) this.solrurl = "http://127.0.0.1:8983/solr/"; // that should work for the example configuration of solr 4.x.x
this.coreNames = coreNames == null ? new ArrayList<String>() : coreNames;
this.defaultCoreName = defaultCoreName;
if (this.defaultCoreName == null) this.defaultCoreName = "shard0";
if (!this.coreNames.contains(this.defaultCoreName)) this.coreNames.add(this.defaultCoreName);
// check the url
if (this.solrurl.endsWith("/")) {
// this could mean that we have a path without a core name (correct)
// or that the core name is appended and contains a badly '/' at the end (must be corrected)
if (this.solrurl.endsWith(this.defaultCoreName + "/")) {
this.solrurl = this.solrurl.substring(0, this.solrurl.length() - this.defaultCoreName.length() - 1);
}
} else {
// this could mean that we have an url which ends with the core name (must be corrected)
// or that the url has a mising '/' (must be corrected)
if (this.solrurl.endsWith(this.defaultCoreName)) {
this.solrurl = this.solrurl.substring(0, this.solrurl.length() - this.defaultCoreName.length());
} else {
this.solrurl = this.solrurl + "/";
}
}
// Make a http client, connect using authentication. An url like
// http://127.0.0.1:8983/solr/shard0
// is proper, and contains the core name as last element in the path
final MultiProtocolURI u;
try {
u = new MultiProtocolURI(this.solrurl + this.defaultCoreName);
} catch (MalformedURLException e) {
throw new IOException(e.getMessage());
}
String solraccount, solrpw;
String host = u.getHost();
final String userinfo = u.getUserInfo();
if (userinfo == null || userinfo.isEmpty()) {
solraccount = ""; solrpw = "";
} else {
final int p = userinfo.indexOf(':');
if (p < 0) {
solraccount = userinfo; solrpw = "";
} else {
solraccount = userinfo.substring(0, p); solrpw = userinfo.substring(p + 1);
}
}
if (solraccount.length() > 0) {
PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); // try also: ThreadSafeClientConnManager
cm.setMaxTotal(100);
this.client = new DefaultHttpClient(cm) {
@Override
protected HttpContext createHttpContext() {
HttpContext context = super.createHttpContext();
AuthCache authCache = new BasicAuthCache();
BasicScheme basicAuth = new BasicScheme();
HttpHost targetHost = new HttpHost(u.getHost(), u.getPort(), u.getProtocol());
authCache.put(targetHost, basicAuth);
context.setAttribute(ClientContext.AUTH_CACHE, authCache);
return context;
}
};
this.client.addRequestInterceptor(new HttpRequestInterceptor() {
@Override
public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
if (!request.containsHeader("Accept-Encoding")) request.addHeader("Accept-Encoding", "gzip");
}
});
this.client.addResponseInterceptor(new HttpResponseInterceptor() {
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
HttpEntity entity = response.getEntity();
if (entity != null) {
Header ceheader = entity.getContentEncoding();
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (HeaderElement codec : codecs) {
if (codec.getName().equalsIgnoreCase("gzip")) {
response.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;
}
}
}
}
}
});
BasicCredentialsProvider credsProvider = new BasicCredentialsProvider();
credsProvider.setCredentials(new AuthScope(host, AuthScope.ANY_PORT), new UsernamePasswordCredentials(solraccount, solrpw));
this.client.setCredentialsProvider(credsProvider);
} else {
this.client = null;
}
this.defaultServer = (HttpSolrServer) getServer(this.defaultCoreName);
if (this.defaultServer == null) throw new IOException("cannot connect to url " + url + " and connect core " + defaultCoreName);
}
public String getAdminInterface() {
final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();
String u = this.solrurl;
int p = u.indexOf("localhost",0);
if (p < 0) p = u.indexOf("127.0.0.1",0);
if (p < 0) p = u.indexOf("0:0:0:0:0:0:0:1",0);
if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
return u + (u.endsWith("/") ? "admin/" : "/admin/");
}
@Override
public String getDefaultCoreName() {
return this.defaultCoreName;
}
@Override
public Collection<String> getCoreNames() {
return this.coreNames;
}
@Override
public SolrServer getDefaultServer() {
return this.defaultServer;
}
@Override
public SolrServer getServer(String name) {
HttpSolrServer server;
if (this.client != null) {
final MultiProtocolURI u;
try {
u = new MultiProtocolURI(this.solrurl + name);
} catch (MalformedURLException e) {
return null;
}
String host = u.getHost();
int port = u.getPort();
String solrpath = u.getPath();
String p = "http://" + host + ":" + port + solrpath;
Log.logInfo("RemoteSolrConnector", "connecting Solr authenticated with url:" + p);
server = new HttpSolrServer(p, client);
} else {
Log.logInfo("RemoteSolrConnector", "connecting Solr with url:" + this.solrurl + name);
server = new HttpSolrServer(this.solrurl + name);
}
server.setAllowCompression(true);
server.setConnectionTimeout(60000);
server.setMaxRetries(1); // Solr-Doc: No more than 1 recommended (depreciated)
server.setSoTimeout(60000);
return server;
}
@Override
public void close() {
if (this.client != null) this.client.getConnectionManager().shutdown();
}
}

View File

@ -84,8 +84,8 @@ public class CrawlQueues {
this.log.logConfig("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
this.errorURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrSchema(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
this.delegatedURL = new ZURL(sb.index.fulltext().getSolr(), sb.index.fulltext().getSolrSchema(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
}
public void relocate(final File newQueuePath) {
@ -96,8 +96,8 @@ public class CrawlQueues {
this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
this.errorURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
this.errorURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrSchema(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
this.delegatedURL = new ZURL(this.sb.index.fulltext().getSolr(), this.sb.index.fulltext().getSolrSchema(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
}
public synchronized void close() {

View File

@ -32,7 +32,6 @@ package net.yacy.data;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

View File

@ -72,6 +72,7 @@ import net.yacy.cora.federate.opensearch.SRURSSConnector;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.RemoteSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest;
@ -105,7 +106,6 @@ import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.query.QueryModifier;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SecondarySearchSuperviser;
import net.yacy.search.snippet.TextSnippet;
@ -1059,9 +1059,12 @@ public final class Protocol {
} else {
final String solrURL = "http://" + target.getPublicAddress() + "/solr";
try {
SolrConnector solrConnector = new RemoteSolrConnector(solrURL);
SolrRemoteInstance instance = new SolrRemoteInstance(solrURL);
SolrConnector solrConnector = new RemoteSolrConnector(instance, "solr");
rsp = solrConnector.query(solrQuery);
docList = rsp.getResults();
solrConnector.close();
instance.close();
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (Throwable e) {
Network.log.logInfo("SEARCH failed (solr), remote Peer: " +target.getName() + "/" + target.getPublicAddress() + " (" + e.getMessage() + ")", e);
@ -1148,7 +1151,7 @@ public final class Protocol {
// passed all checks, store url
if (!localsearch) {
try {
event.query.getSegment().fulltext().putDocument(YaCySchema.toSolrInputDocument(doc));
event.query.getSegment().fulltext().putDocument(event.query.getSegment().fulltext().getSolrSchema().toSolrInputDocument(doc));
ResultURLs.stack(
ASCII.String(urlEntry.url().hash()),
urlEntry.url().getHost(),

View File

@ -103,6 +103,7 @@ import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.ShardSelection;
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.instance.SolrRemoteInstance;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.federate.yacy.ConfigurationSet;
import net.yacy.cora.lod.JenaTripleStore;
@ -413,9 +414,9 @@ public final class Switchboard extends serverSwitch {
for (YaCySchema field: new YaCySchema[]{
YaCySchema.host_s, YaCySchema.load_date_dt,
YaCySchema.url_file_ext_s, YaCySchema.last_modified, // needed for media search and /date operator
YaCySchema.url_paths_sxt, YaCySchema.host_organization_s, // needed to search in the url
YaCySchema.inboundlinks_protocol_sxt, YaCySchema.inboundlinks_urlstub_txt, // needed for HostBrowser
YaCySchema.outboundlinks_protocol_sxt, YaCySchema.outboundlinks_urlstub_txt // needed to enhance the crawler
/*YaCySchema.url_paths_sxt,*/ YaCySchema.host_organization_s, // needed to search in the url
/*YaCySchema.inboundlinks_protocol_sxt,*/ YaCySchema.inboundlinks_urlstub_txt, // needed for HostBrowser
/*YaCySchema.outboundlinks_protocol_sxt,*/ YaCySchema.outboundlinks_urlstub_txt // needed to enhance the crawler
}) {
ConfigurationSet.Entry entry = solrScheme.get(field.name()); entry.setEnable(true); solrScheme.put(field.name(), entry);
}
@ -441,10 +442,8 @@ public final class Switchboard extends serverSwitch {
if (usesolr && solrurls != null && solrurls.length() > 0) {
try {
SolrConnector solr = new ShardSolrConnector(
solrurls,
ShardSelection.Method.MODULO_HOST_MD5,
10000, true);
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
this.index.fulltext().connectRemoteSolr(solr);
} catch ( final IOException e ) {
Log.logException(e);
@ -1231,7 +1230,7 @@ public final class Switchboard extends serverSwitch {
synchronized ( this ) {
// remember the solr scheme
SolrConfiguration solrScheme = this.index.fulltext().getSolrScheme();
SolrConfiguration solrScheme = this.index.fulltext().getSolrSchema();
// shut down
this.crawler.close();
@ -1293,10 +1292,8 @@ public final class Switchboard extends serverSwitch {
if (usesolr && solrurls != null && solrurls.length() > 0) {
try {
SolrConnector solr = new ShardSolrConnector(
solrurls,
ShardSelection.Method.MODULO_HOST_MD5,
10000, true);
ArrayList<SolrRemoteInstance> instances = ShardSolrConnector.getShardInstances(solrurls);
ShardSolrConnector solr = new ShardSolrConnector(instances, ShardSelection.Method.MODULO_HOST_MD5, true);
this.index.fulltext().connectRemoteSolr(solr);
} catch ( final IOException e ) {
Log.logException(e);
@ -2220,7 +2217,7 @@ public final class Switchboard extends serverSwitch {
// if no crawl is running and processing is activated:
// execute the (post-) processing steps for all entries that have a process tag assigned
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) {
if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrSchema().contains(YaCySchema.process_sxt)) {
// that means we must search for those entries.
index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found
//BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
@ -2240,7 +2237,7 @@ public final class Switchboard extends serverSwitch {
// switch over tag types
if (tagtype == ProcessType.CLICKDEPTH) {
//proctags.remove(tag);
if (index.fulltext().getSolrScheme().contains(YaCySchema.clickdepth_i)) {
if (index.fulltext().getSolrSchema().contains(YaCySchema.clickdepth_i)) {
DigestURI url;
try {
// get new click depth and compare with old
@ -2248,11 +2245,11 @@ public final class Switchboard extends serverSwitch {
url = new DigestURI((String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
int clickdepth = SolrConfiguration.getClickDepth(index.urlCitation(), url);
if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++;
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
SolrInputDocument sid = index.fulltext().getSolrSchema().toSolrInputDocument(doc);
sid.setField(YaCySchema.clickdepth_i.getSolrFieldName(), clickdepth);
// refresh the link count; it's 'cheap' to do this here
if (index.fulltext().getSolrScheme().contains(YaCySchema.references_i)) {
if (index.fulltext().getSolrSchema().contains(YaCySchema.references_i)) {
Integer oldreferences = (Integer) doc.getFieldValue(YaCySchema.references_i.getSolrFieldName());
int references = index.urlCitation().count(url.hash());
if (references > 0) {

View File

@ -61,9 +61,9 @@ public class DocumentIndex extends Segment {
static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");
public DocumentIndex(final File segmentPath, final File schemePath, final CallbackListener callback, final int cachesize)
public DocumentIndex(final File segmentPath, final File schemaPath, final CallbackListener callback, final int cachesize)
throws IOException {
super(new Log("DocumentIndex"), segmentPath, schemePath == null ? null : new SolrConfiguration(schemePath, true));
super(new Log("DocumentIndex"), segmentPath, schemaPath == null ? null : new SolrConfiguration(schemaPath, true));
super.connectRWI(cachesize, targetFileSize * 4 - 1);
super.connectCitation(cachesize, targetFileSize * 4 - 1);
super.connectUrlDb(

View File

@ -45,7 +45,9 @@ import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.AbstractSolrConnector;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
import net.yacy.cora.federate.solr.connector.ShardSolrConnector;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.solr.instance.SolrEmbeddedInstance;
import net.yacy.cora.order.CloneableIterator;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.sorting.ScoreMap;
@ -82,16 +84,16 @@ public final class Fulltext {
private String tablename;
private ArrayList<HostStat> statsDump;
private final MirrorSolrConnector solr;
private final SolrConfiguration solrScheme;
private final SolrConfiguration solrSchema;
protected Fulltext(final File path, final SolrConfiguration solrScheme) {
protected Fulltext(final File path, final SolrConfiguration solrSchema) {
this.location = path;
this.tablename = null;
this.urlIndexFile = null;
this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null;
this.solr = new MirrorSolrConnector(10000, 10000, 100);
this.solrScheme = solrScheme;
this.solrSchema = solrSchema;
}
/**
@ -134,8 +136,8 @@ public final class Fulltext {
this.urlIndexFile = null;
}
public SolrConfiguration getSolrScheme() {
return this.solrScheme;
public SolrConfiguration getSolrSchema() {
return this.solrSchema;
}
public boolean connectedLocalSolr() {
@ -151,7 +153,8 @@ public final class Fulltext {
File oldLocation = new File(baseLocation, oldVersion);
if (oldLocation.exists()) oldLocation.renameTo(solrLocation);
}
EmbeddedSolrConnector esc = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
SolrEmbeddedInstance instance = new SolrEmbeddedInstance(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
EmbeddedSolrConnector esc = new EmbeddedSolrConnector(instance);
Version luceneVersion = esc.getConfig().getLuceneVersion("luceneMatchVersion");
String lvn = luceneVersion.name();
Log.logInfo("Fulltext", "using lucene version " + lvn);
@ -169,7 +172,7 @@ public final class Fulltext {
return this.solr.isConnected1();
}
public void connectRemoteSolr(final SolrConnector rs) {
public void connectRemoteSolr(final ShardSolrConnector rs) {
this.solr.connect1(rs);
}
@ -291,7 +294,7 @@ public final class Fulltext {
final Row.Entry entry = this.urlIndexFile.remove(urlHash);
if (entry == null) return null;
URIMetadataRow row = new URIMetadataRow(entry, wre);
SolrInputDocument solrInput = this.solrScheme.metadata2solr(row);
SolrInputDocument solrInput = this.solrSchema.metadata2solr(row);
this.putDocument(solrInput);
return new URIMetadataNode(solrInput, wre, weight);
} catch (final IOException e) {
@ -309,7 +312,7 @@ public final class Fulltext {
Date sdDate = (Date) this.solr.getFieldById(id, YaCySchema.last_modified.getSolrFieldName());
Date docDate = null;
if (sdDate == null || (docDate = SolrConfiguration.getDate(doc, YaCySchema.last_modified)) == null || sdDate.before(docDate)) {
if (this.solrScheme.contains(YaCySchema.ip_s)) {
if (this.solrSchema.contains(YaCySchema.ip_s)) {
// ip_s needs a dns lookup which causes blockings during search here
this.solr.add(doc);
} else synchronized (this.solr) {
@ -332,11 +335,11 @@ public final class Fulltext {
if (this.urlIndexFile != null) this.urlIndexFile.remove(idb);
SolrDocument sd = this.solr.getById(id);
if (sd == null || (new URIMetadataNode(sd)).isOlder(row)) {
if (this.solrScheme.contains(YaCySchema.ip_s)) {
if (this.solrSchema.contains(YaCySchema.ip_s)) {
// ip_s needs a dns lookup which causes blockings during search here
this.solr.add(getSolrScheme().metadata2solr(row));
this.solr.add(getSolrSchema().metadata2solr(row));
} else synchronized (this.solr) {
this.solr.add(getSolrScheme().metadata2solr(row));
this.solr.add(getSolrSchema().metadata2solr(row));
}
}
} catch (SolrException e) {
@ -544,11 +547,12 @@ public final class Fulltext {
Log.logWarning("Fulltext", "HOT DUMP selected solr0 == NULL, no dump list!");
return zips;
}
if (esc.getStoragePath() == null) {
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
if (sei.getStoragePath() == null) {
Log.logWarning("Fulltext", "HOT DUMP selected solr0.getStoragePath() == NULL, no dump list!");
return zips;
}
File storagePath = esc.getStoragePath().getParentFile();
File storagePath = sei.getStoragePath().getParentFile();
if (storagePath == null) {
Log.logWarning("Fulltext", "HOT DUMP selected esc.getStoragePath().getParentFile() == NULL, no dump list!");
return zips;
@ -566,7 +570,8 @@ public final class Fulltext {
*/
public File dumpSolr() {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
File storagePath = esc.getStoragePath();
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
File storagePath = sei.getStoragePath();
File zipOut = new File(storagePath.toString() + "_" + GenericFormatter.SHORT_DAY_FORMATTER.format() + ".zip");
synchronized (this.solr) {
this.disconnectLocalSolr();
@ -591,7 +596,8 @@ public final class Fulltext {
*/
public void restoreSolr(File solrDumpZipFile) {
EmbeddedSolrConnector esc = (EmbeddedSolrConnector) this.solr.getSolr0();
File storagePath = esc.getStoragePath();
SolrEmbeddedInstance sei = (SolrEmbeddedInstance) esc.getInstance();
File storagePath = sei.getStoragePath();
synchronized (this.solr) {
this.disconnectLocalSolr();
try {

View File

@ -112,13 +112,13 @@ public class Segment {
protected IndexCell<WordReference> termIndex;
protected IndexCell<CitationReference> urlCitationIndex;
public Segment(final Log log, final File segmentPath, final SolrConfiguration solrScheme) {
public Segment(final Log log, final File segmentPath, final SolrConfiguration solrSchema) {
log.logInfo("Initializing Segment '" + segmentPath + ".");
this.log = log;
this.segmentPath = segmentPath;
// create LURL-db
this.fulltext = new Fulltext(segmentPath, solrScheme);
this.fulltext = new Fulltext(segmentPath, solrSchema);
}
public boolean connectedRWI() {
@ -371,10 +371,10 @@ public class Segment {
this.fulltext.getSolr().commit(false);
} else {
if (
(this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.exact_signature_unique_b)) ||
(this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrScheme().contains(YaCySchema.fuzzy_signature_unique_b)) ||
this.fulltext.getSolrScheme().contains(YaCySchema.title_unique_b) ||
this.fulltext.getSolrScheme().contains(YaCySchema.description_unique_b)
(this.fulltext.getSolrSchema().contains(YaCySchema.exact_signature_l) && this.fulltext.getSolrSchema().contains(YaCySchema.exact_signature_unique_b)) ||
(this.fulltext.getSolrSchema().contains(YaCySchema.fuzzy_signature_l) && this.fulltext.getSolrSchema().contains(YaCySchema.fuzzy_signature_unique_b)) ||
this.fulltext.getSolrSchema().contains(YaCySchema.title_unique_b) ||
this.fulltext.getSolrSchema().contains(YaCySchema.description_unique_b)
) {
this.fulltext.getSolr().commit(true); // make sure that we have latest information for the postprocessing steps
}
@ -395,7 +395,7 @@ public class Segment {
char docType = Response.docType(document.dc_format());
// CREATE SOLR DOCUMENT
final SolrInputDocument solrInputDoc = this.fulltext.getSolrScheme().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex);
final SolrInputDocument solrInputDoc = this.fulltext.getSolrSchema().yacy2solr(id, profile, responseHeader, document, condenser, referrerURL, language, urlCitationIndex);
// FIND OUT IF THIS IS A DOUBLE DOCUMENT
for (YaCySchema[] checkfields: new YaCySchema[][]{
@ -403,7 +403,7 @@ public class Segment {
{YaCySchema.fuzzy_signature_l, YaCySchema.fuzzy_signature_unique_b}}) {
YaCySchema checkfield = checkfields[0];
YaCySchema uniquefield = checkfields[1];
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
if (this.fulltext.getSolrSchema().contains(checkfield) && this.fulltext.getSolrSchema().contains(uniquefield)) {
// lookup the document with the same signature
long signature = ((Long) solrInputDoc.getField(checkfield.getSolrFieldName()).getValue()).longValue();
try {
@ -421,7 +421,7 @@ public class Segment {
{YaCySchema.description, YaCySchema.description_unique_b}}) {
YaCySchema checkfield = checkfields[0];
YaCySchema uniquefield = checkfields[1];
if (this.fulltext.getSolrScheme().contains(checkfield) && this.fulltext.getSolrScheme().contains(uniquefield)) {
if (this.fulltext.getSolrSchema().contains(checkfield) && this.fulltext.getSolrSchema().contains(uniquefield)) {
// lookup in the index for the same title
String checkstring = checkfield == YaCySchema.title ? document.dc_title() : document.dc_description();
if (checkstring.length() == 0) {
@ -436,7 +436,7 @@ public class Segment {
// switch attribute also in all existing documents (which should be exactly only one!)
SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000);
for (SolrDocument doc: docs) {
SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
SolrInputDocument sid = this.fulltext.getSolrSchema().toSolrInputDocument(doc);
sid.setField(uniquefield.getSolrFieldName(), false);
this.fulltext.getSolr().add(sid);
}
@ -448,7 +448,7 @@ public class Segment {
}
// ENRICH DOCUMENT WITH RANKING INFORMATION
if (this.urlCitationIndex != null && this.fulltext.getSolrScheme().contains(YaCySchema.references_i)) {
if (this.urlCitationIndex != null && this.fulltext.getSolrSchema().contains(YaCySchema.references_i)) {
int references = this.urlCitationIndex.count(url.hash());
if (references > 0) solrInputDoc.setField(YaCySchema.references_i.getSolrFieldName(), references);
}

View File

@ -1,5 +1,5 @@
/**
* SolrScheme
* SolrConfiguration
* Copyright 2011 by Michael Peter Christen
* First released 14.04.2011 at http://yacy.net
*
@ -33,6 +33,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
@ -68,6 +69,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.Bitfield;
import net.yacy.kelondro.util.ByteBuffer;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
@ -87,7 +89,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
/**
* initialize the scheme with a given configuration file
* initialize the schema with a given configuration file
* the configuration file simply contains a list of lines with keywords
* or keyword = value lines (while value is a custom Solr field name
* @param configurationFile
@ -103,18 +105,38 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
YaCySchema f = YaCySchema.valueOf(etr.key());
f.setSolrFieldName(etr.getValue());
} catch (IllegalArgumentException e) {
Log.logFine("SolrScheme", "solr scheme file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
Log.logFine("SolrSchema", "solr schema file " + configurationFile.getAbsolutePath() + " defines unknown attribute '" + etr.toString() + "'");
it.remove();
}
}
// check consistency the other way: look if all enum constants in SolrField appear in the configuration file
for (YaCySchema field: YaCySchema.values()) {
if (this.get(field.name()) == null) {
Log.logWarning("SolrScheme", " solr scheme file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
Log.logWarning("SolrSchema", " solr schema file " + configurationFile.getAbsolutePath() + " is missing declaration for '" + field.name() + "'");
}
}
}
/**
* Convert a SolrDocument to a SolrInputDocument.
* This is useful if a document from the search index shall be modified and indexed again.
* This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
* which are created automatically during the indexing process.
* @param doc the solr document
* @return a solr input document
*/
public SolrInputDocument toSolrInputDocument(SolrDocument doc) {
SolrInputDocument sid = new SolrInputDocument();
Set<String> omitFields = new HashSet<String>();
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
for (String name: doc.getFieldNames()) {
if (this.contains(name) && !omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
}
return sid;
}
public boolean contains(YaCySchema field) {
return this.contains(field.name());
}
@ -318,7 +340,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final String id, final CrawlProfile profile, final ResponseHeader responseHeader,
final Document document, Condenser condenser, DigestURI referrerURL, String language,
IndexCell<CitationReference> citations) {
// we use the SolrCell design as index scheme
// we use the SolrCell design as index schema
final SolrInputDocument doc = new SolrInputDocument();
final DigestURI digestURI = DigestURI.toDigestURI(document.dc_source());
boolean allAttr = this.isEmpty();
@ -445,7 +467,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
if (allAttr || contains(YaCySchema.url_paths_sxt)) add(doc, YaCySchema.url_paths_sxt, digestURI.getPaths());
if (allAttr || contains(YaCySchema.url_file_ext_s)) add(doc, YaCySchema.url_file_ext_s, digestURI.getFileExtension());
// get list of all links; they will be shrinked by urls that appear in other fields of the solr scheme
// get list of all links; they will be shrinked by urls that appear in other fields of the solr schema
Set<MultiProtocolURI> inboundLinks = document.inboundLinks();
Set<MultiProtocolURI> outboundLinks = document.outboundLinks();

View File

@ -128,7 +128,7 @@ public final class QueryParams {
public List<String> facetfields;
public int maxfacets;
private SolrQuery cachedQuery;
private SolrConfiguration solrScheme;
private SolrConfiguration solrSchema;
// the following values are filled during the search process as statistics for the search
public final AtomicInteger local_rwi_available; // the number of hits generated/ranked by the local search in rwi index
@ -191,9 +191,9 @@ public final class QueryParams {
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
this.facetfields = new ArrayList<String>();
this.solrScheme = indexSegment.fulltext().getSolrScheme();
this.solrSchema = indexSegment.fulltext().getSolrSchema();
for (YaCySchema f: defaultfacetfields) {
if (solrScheme.contains(f)) facetfields.add(f.getSolrFieldName());
if (solrSchema.contains(f)) facetfields.add(f.getSolrFieldName());
}
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
@ -300,9 +300,9 @@ public final class QueryParams {
this.misses = Collections.synchronizedSortedSet(new TreeSet<byte[]>(URIMetadataRow.rowdef.objectOrder));
this.facetfields = new ArrayList<String>();
this.solrScheme = indexSegment.fulltext().getSolrScheme();
this.solrSchema = indexSegment.fulltext().getSolrSchema();
for (YaCySchema f: defaultfacetfields) {
if (solrScheme.contains(f)) facetfields.add(f.getSolrFieldName());
if (solrSchema.contains(f)) facetfields.add(f.getSolrFieldName());
}
for (Tagging v: LibraryProvider.autotagging.getVocabularies()) this.facetfields.add(YaCySchema.VOCABULARY_PREFIX + v.getName() + YaCySchema.VOCABULARY_SUFFIX);
this.maxfacets = defaultmaxfacets;
@ -432,7 +432,7 @@ public final class QueryParams {
if (this.queryGoal.getIncludeStrings().size() == 0) return null;
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery(this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrScheme()).toString());
params.setQuery(this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrSchema()).toString());
params.setParam("defType", "edismax");
params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
@ -468,7 +468,7 @@ public final class QueryParams {
}
// add author facets
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrScheme.contains(YaCySchema.author_sxt)) {
if (this.modifier.author != null && this.modifier.author.length() > 0 && this.solrSchema.contains(YaCySchema.author_sxt)) {
fq.append(" AND ").append(YaCySchema.author_sxt.getSolrFieldName()).append(":\"").append(this.modifier.author).append('\"');
}

View File

@ -648,7 +648,7 @@ public final class SearchEvent {
int loops = 0; // a loop counter to terminate the reading if all the results are from the same domain
// wait some time if we did not get so much remote results so far to get a better ranking over remote results
// we wait at most 30 milliseconds to get a maximum total waiting time of 300 milliseconds for 10 results
long wait = waitTimeRecommendation();
long wait = Math.min(waitingtime, waitTimeRecommendation());
if ( wait > 0 ) {
//System.out.println("*** RWIProcess extra wait: " + wait + "ms; expectedRemoteReferences = " + this.expectedRemoteReferences.get() + ", receivedRemoteReferences = " + this.receivedRemoteReferences.get() + ", initialExpectedRemoteReferences = " + this.maxExpectedRemoteReferences.get());
Thread.sleep(wait);