yacy_search_server/htroot/gsa/searchresult.java

197 lines
8.1 KiB
Java
Raw Normal View History

/**
* search
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.08.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
2012-08-21 01:57:46 +02:00
import java.util.Map;
import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
2012-09-25 21:20:03 +02:00
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter;
2012-08-16 16:28:57 +02:00
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.query.AccessTracker;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
2012-11-02 12:29:48 +01:00
import org.apache.solr.util.FastWriter;
// try
// http://localhost:8090/gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
/**
* This is a gsa result formatter for solr search results.
* The result format is implemented according to
* https://developers.google.com/search-appliance/documentation/68/xml_reference#results_xml
*/
public class searchresult {
private final static GSAResponseWriter responseWriter = new GSAResponseWriter();
/**
* get the right mime type for this streamed result page
* @param header
* @param post
* @param env
* @return
*/
public static String mime(final RequestHeader header, final serverObjects post, final serverSwitch env) {
return "text/xml";
}
2012-08-21 01:57:46 +02:00
/**
* @param header
* @param post
* @param env
* @param out
* @return
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env, final OutputStream out) {
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
Switchboard sb = (Switchboard) env;
2012-08-16 16:28:57 +02:00
// remember the peer contact for peer statistics
final String clientip = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, "<unknown>"); // read an artificial header addendum
final String userAgent = header.get(HeaderFramework.USER_AGENT, "<unknown>");
sb.peers.peerActions.setUserAgent(clientip, userAgent);
// check if user is allowed to search (can be switched in /ConfigPortal.html)
boolean authenticated = sb.adminAuthenticated(header) >= 2;
final boolean searchAllowed = authenticated || sb.getConfigBool("publicSearchpage", true);
if (!searchAllowed) return null;
// check post
if (post == null) return null;
Log.logInfo("GSA Query", post.toString());
sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time
// rename post fields according to result style
//post.put(CommonParams.Q, post.remove("q")); // same as solr
//post.put(CommonParams.START, post.remove("start")); // same as solr
//post.put(, post.remove("site"));//required, example: col1|col2
//post.put(, post.remove("client"));//required, example: myfrontend
//post.put(, post.remove("output"));//required, example: xml,xml_no_dtd
String q = post.get(CommonParams.Q, "");
post.put("originalQuery", q);
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
post.put("hl", "true");
post.put("hl.fl", YaCySchema.h1_txt.getSolrFieldName() + "," + YaCySchema.h2_txt.getSolrFieldName() + "," + YaCySchema.text_t.getSolrFieldName());
post.put("hl.alternateField", YaCySchema.description.getSolrFieldName());
post.put("hl.simple.pre", "<b>");
post.put("hl.simple.post", "</b>");
post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH));
2012-08-21 02:39:28 +02:00
GSAResponseWriter.Sort sort = new GSAResponseWriter.Sort(post.get(CommonParams.SORT, ""));
2012-08-21 01:57:46 +02:00
String sorts = sort.toSolr();
if (sorts == null) {
post.remove(CommonParams.SORT);
} else {
post.put(CommonParams.SORT, sorts);
}
String site = post.remove("site");
String access = post.remove("access");
String entqr = post.remove("entqr");
// get a solr query string
Collection<String>[] cq = QueryParams.cleanQuery(q);
q = QueryParams.solrQueryString(cq[0], cq[1], sb.index.fulltext().getSolrScheme()).toString();
// add sites operator
if (site != null && site.length() > 0) {
String[] s0 = site.split(Pattern.quote("|"));
ArrayList<String> sites = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim().toLowerCase();
if (s.length() > 0) sites.add(s);
}
if (sites.size() > 1) {
q += " AND (" + YaCySchema.collection_sxt.getSolrFieldName() + ":" + sites.get(0);
for (int i = 1; i < sites.size(); i++) {
q += " OR " + YaCySchema.collection_sxt.getSolrFieldName() + ":" + sites.get(i);
}
q += ")";
} else if (sites.size() == 1) {
q += " AND " + YaCySchema.collection_sxt.getSolrFieldName() + ":" + sites.get(0);
}
post.put(CommonParams.Q, q);
}
post.put(CommonParams.Q, q);
// get the embedded connector
2012-08-17 15:52:33 +02:00
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();
if (connector == null) return null;
// do the solr request
SolrQueryRequest req = connector.request(post.toSolrParams(null));
SolrQueryResponse response = null;
Exception e = null;
try {response = connector.query(req);} catch (SolrException ee) {e = ee;}
if (response != null) e = response.getException();
if (e != null) {
Log.logException(e);
return null;
}
2012-08-21 01:57:46 +02:00
// set some context for the writer
Map<Object,Object> context = req.getContext();
context.put("ip", header.get("CLIENTIP", ""));
context.put("client", "vsm_frontent");
2012-08-21 01:57:46 +02:00
context.put("sort", sort.sort);
context.put("site", site == null ? "" : site);
context.put("access", access == null ? "p" : access);
context.put("entqr", entqr == null ? "3" : entqr);
// write the result directly to the output stream
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
try {
responseWriter.write(ow, req, response);
ow.flush();
} catch (IOException e1) {
} finally {
req.close();
try {ow.close();} catch (IOException e1) {}
}
// log result
Object rv = response.getValues().get("response");
if (rv != null && rv instanceof ResultContext) {
AccessTracker.addToDump(q, Integer.toString(((ResultContext) rv).docs.matches()));
}
return null;
}
}