mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
b6de1f42dc
multiple solr cores instead of just one. Therefore it is now necessary to distuingish between solr server connections (called an 'Instance') and a connection to a single solr core. One Instance may now have multiple connector classes assigned to it, each connecting to a single core. To support multiple cores it is also necessary to distinguish between the connection configuration and the configuration of the index schema. We will have multiple schema configurations in the future, each for every solr core. This caused that the IndexFederated servlet had to be split into two parts, the new Servlet for the Schema editor is now in the IndexSchema Servlet.
213 lines
9.2 KiB
Java
213 lines
9.2 KiB
Java
/**
|
|
* search
|
|
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
|
* First released 14.08.2012 at http://yacy.net
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program in the file lgpl21.txt
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
import java.io.IOException;
|
|
import java.io.OutputStream;
|
|
import java.io.OutputStreamWriter;
|
|
import java.io.Writer;
|
|
import java.util.ArrayList;
|
|
import java.util.Map;
|
|
|
|
import net.yacy.cora.document.UTF8;
|
|
import net.yacy.cora.federate.solr.Boost;
|
|
import net.yacy.cora.federate.solr.YaCySchema;
|
|
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
|
|
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter;
|
|
import net.yacy.cora.protocol.HeaderFramework;
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
import net.yacy.cora.util.CommonPattern;
|
|
import net.yacy.kelondro.logging.Log;
|
|
import net.yacy.search.Switchboard;
|
|
import net.yacy.search.SwitchboardConstants;
|
|
import net.yacy.search.query.AccessTracker;
|
|
import net.yacy.search.query.QueryGoal;
|
|
import net.yacy.search.query.SearchEvent;
|
|
import net.yacy.server.serverObjects;
|
|
import net.yacy.server.serverSwitch;
|
|
|
|
import org.apache.solr.common.SolrException;
|
|
import org.apache.solr.common.params.CommonParams;
|
|
import org.apache.solr.request.SolrQueryRequest;
|
|
import org.apache.solr.response.ResultContext;
|
|
import org.apache.solr.response.SolrQueryResponse;
|
|
import org.apache.solr.util.FastWriter;
|
|
|
|
|
|
// try
|
|
// http://localhost:8090/gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
|
|
|
|
/**
|
|
* This is a gsa result formatter for solr search results.
|
|
* The result format is implemented according to
|
|
* https://developers.google.com/search-appliance/documentation/68/xml_reference#results_xml
|
|
*/
|
|
public class searchresult {
|
|
|
|
private final static GSAResponseWriter responseWriter = new GSAResponseWriter();
|
|
|
|
/**
|
|
* get the right mime type for this streamed result page
|
|
* @param header
|
|
* @param post
|
|
* @param env
|
|
* @return
|
|
*/
|
|
public static String mime(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
|
return "text/xml";
|
|
}
|
|
|
|
|
|
/**
|
|
* @param header
|
|
* @param post
|
|
* @param env
|
|
* @param out
|
|
* @return
|
|
*/
|
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env, final OutputStream out) {
|
|
|
|
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
|
|
Switchboard sb = (Switchboard) env;
|
|
|
|
// remember the peer contact for peer statistics
|
|
final String clientip = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
|
|
final String userAgent = header.get(HeaderFramework.USER_AGENT, "<unknown>");
|
|
sb.peers.peerActions.setUserAgent(clientip, userAgent);
|
|
|
|
// check if user is allowed to search (can be switched in /ConfigPortal.html)
|
|
boolean authenticated = sb.adminAuthenticated(header) >= 2;
|
|
final boolean searchAllowed = authenticated || sb.getConfigBool("publicSearchpage", true);
|
|
if (!searchAllowed) return null;
|
|
|
|
// check post
|
|
if (post == null) return null;
|
|
Log.logInfo("GSA Query", post.toString());
|
|
sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time
|
|
|
|
// update the boost values
|
|
Boost.RANKING.updateBoosts(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, ""));
|
|
|
|
// rename post fields according to result style
|
|
//post.put(CommonParams.Q, post.remove("q")); // same as solr
|
|
//post.put(CommonParams.START, post.remove("start")); // same as solr
|
|
//post.put(, post.remove("client"));//required, example: myfrontend
|
|
//post.put(, post.remove("output"));//required, example: xml,xml_no_dtd
|
|
String originalQuery = post.get(CommonParams.Q, "");
|
|
post.put("originalQuery", originalQuery);
|
|
|
|
// get a solr query string
|
|
QueryGoal qg = new QueryGoal(originalQuery, originalQuery);
|
|
StringBuilder solrQ = qg.solrQueryString(sb.index.fulltext().getSolrSchema());
|
|
post.put("defType", "edismax");
|
|
post.put(CommonParams.Q, solrQ.toString());
|
|
post.put(CommonParams.ROWS, post.remove("num"));
|
|
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
|
|
post.put("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
|
|
post.put("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
|
|
post.put(CommonParams.FL,
|
|
YaCySchema.content_type.getSolrFieldName() + ',' +
|
|
YaCySchema.id.getSolrFieldName() + ',' +
|
|
YaCySchema.sku.getSolrFieldName() + ',' +
|
|
YaCySchema.title.getSolrFieldName() + ',' +
|
|
YaCySchema.description.getSolrFieldName() + ',' +
|
|
YaCySchema.load_date_dt.getSolrFieldName() + ',' +
|
|
YaCySchema.last_modified.getSolrFieldName() + ',' +
|
|
YaCySchema.size_i.getSolrFieldName());
|
|
post.put("hl", "true");
|
|
post.put("hl.q", originalQuery);
|
|
post.put("hl.fl", YaCySchema.h1_txt.getSolrFieldName() + "," + YaCySchema.h2_txt.getSolrFieldName() + "," + YaCySchema.text_t.getSolrFieldName());
|
|
post.put("hl.alternateField", YaCySchema.description.getSolrFieldName());
|
|
post.put("hl.simple.pre", "<b>");
|
|
post.put("hl.simple.post", "</b>");
|
|
post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
|
|
GSAResponseWriter.Sort sort = new GSAResponseWriter.Sort(post.get(CommonParams.SORT, ""));
|
|
String sorts = sort.toSolr();
|
|
if (sorts == null) {
|
|
post.remove(CommonParams.SORT);
|
|
} else {
|
|
post.put(CommonParams.SORT, sorts);
|
|
}
|
|
String[] site = post.remove("site"); // example: col1|col2
|
|
String[] access = post.remove("access");
|
|
String[] entqr = post.remove("entqr");
|
|
|
|
// add sites operator
|
|
if (site != null && site[0].length() > 0) {
|
|
String[] s0 = CommonPattern.VERTICALBAR.split(site[0]);
|
|
ArrayList<String> sites = new ArrayList<String>(2);
|
|
for (String s: s0) {
|
|
s = s.trim().toLowerCase();
|
|
if (s.length() > 0) sites.add(s);
|
|
}
|
|
StringBuilder fq = new StringBuilder(20);
|
|
if (sites.size() > 1) {
|
|
fq.append(YaCySchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
|
|
for (int i = 1; i < sites.size(); i++) {
|
|
fq.append(" OR ").append(YaCySchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(i));
|
|
}
|
|
} else if (sites.size() == 1) {
|
|
fq.append(YaCySchema.collection_sxt.getSolrFieldName()).append(':').append(sites.get(0));
|
|
}
|
|
post.put(CommonParams.FQ, fq.toString());
|
|
}
|
|
|
|
// get the embedded connector
|
|
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();
|
|
if (connector == null) return null;
|
|
|
|
// do the solr request
|
|
SolrQueryRequest req = connector.request(post.toSolrParams(null));
|
|
SolrQueryResponse response = null;
|
|
Exception e = null;
|
|
try {response = connector.query(req);} catch (SolrException ee) {e = ee;}
|
|
if (response != null) e = response.getException();
|
|
if (e != null) {
|
|
Log.logException(e);
|
|
return null;
|
|
}
|
|
|
|
// set some context for the writer
|
|
Map<Object,Object> context = req.getContext();
|
|
context.put("ip", header.get("CLIENTIP", ""));
|
|
context.put("client", "vsm_frontent");
|
|
context.put("sort", sort.sort);
|
|
context.put("site", site == null ? "" : site);
|
|
context.put("access", access == null ? "p" : access[0]);
|
|
context.put("entqr", entqr == null ? "3" : entqr[0]);
|
|
|
|
// write the result directly to the output stream
|
|
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
|
|
try {
|
|
responseWriter.write(ow, req, response);
|
|
ow.flush();
|
|
} catch (IOException e1) {
|
|
} finally {
|
|
req.close();
|
|
try {ow.close();} catch (IOException e1) {}
|
|
}
|
|
|
|
// log result
|
|
Object rv = response.getValues().get("response");
|
|
if (rv != null && rv instanceof ResultContext) {
|
|
AccessTracker.addToDump(originalQuery, Integer.toString(((ResultContext) rv).docs.matches()));
|
|
}
|
|
return null;
|
|
}
|
|
} |