2012-08-14 11:12:50 +02:00
/ * *
* select
* Copyright 2012 by Michael Peter Christen , mc @yacy.net , Frankfurt am Main , Germany
* First released 12 . 08 . 2012 at http : //yacy.net
*
* This library is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
* version 2 . 1 of the License , or ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21 . txt
* If not , see < http : //www.gnu.org/licenses/>.
* /
2012-07-19 11:34:05 +02:00
import java.io.IOException ;
import java.io.OutputStream ;
import java.io.OutputStreamWriter ;
import java.io.Writer ;
2012-08-09 18:06:48 +02:00
import java.util.HashMap ;
import java.util.Map ;
2012-07-19 11:34:05 +02:00
import javax.servlet.ServletException ;
import net.yacy.cora.document.UTF8 ;
2013-04-12 10:48:41 +02:00
import net.yacy.cora.federate.solr.Ranking ;
2012-09-25 21:20:03 +02:00
import net.yacy.cora.federate.solr.SolrServlet ;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector ;
import net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter ;
2012-11-09 16:25:24 +01:00
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter ;
2013-06-10 18:41:00 +02:00
import net.yacy.cora.federate.solr.responsewriter.GrepHTMLResponseWriter ;
2013-06-09 12:12:34 +02:00
import net.yacy.cora.federate.solr.responsewriter.HTMLResponseWriter ;
2013-06-26 09:27:22 +02:00
import net.yacy.cora.federate.solr.responsewriter.YJsonResponseWriter ;
2012-09-25 21:20:03 +02:00
import net.yacy.cora.federate.solr.responsewriter.OpensearchResponseWriter ;
2012-08-16 16:28:57 +02:00
import net.yacy.cora.protocol.HeaderFramework ;
2012-07-19 11:34:05 +02:00
import net.yacy.cora.protocol.RequestHeader ;
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2012-07-19 11:34:05 +02:00
import net.yacy.search.Switchboard ;
2012-08-09 18:06:48 +02:00
import net.yacy.search.SwitchboardConstants ;
2012-08-22 23:50:40 +02:00
import net.yacy.search.query.AccessTracker ;
2013-02-25 14:31:50 +01:00
import net.yacy.search.query.QueryGoal ;
2013-02-12 03:42:46 +01:00
import net.yacy.search.query.QueryModifier ;
2012-11-01 17:40:06 +01:00
import net.yacy.search.query.SearchEvent ;
2013-02-22 15:45:15 +01:00
import net.yacy.search.schema.CollectionSchema ;
import net.yacy.search.schema.WebgraphSchema ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
2012-07-19 11:34:05 +02:00
2012-08-09 18:06:48 +02:00
import org.apache.solr.common.SolrException ;
2012-08-10 07:58:45 +02:00
import org.apache.solr.common.params.CommonParams ;
2012-09-14 12:09:20 +02:00
import org.apache.solr.common.params.SolrParams ;
2012-08-14 11:12:50 +02:00
import org.apache.solr.common.util.NamedList ;
2012-08-09 18:06:48 +02:00
import org.apache.solr.core.SolrCore ;
2012-07-19 11:34:05 +02:00
import org.apache.solr.request.SolrQueryRequest ;
2013-06-28 15:19:50 +02:00
import org.apache.solr.request.SolrRequestInfo ;
2012-07-19 11:34:05 +02:00
import org.apache.solr.response.QueryResponseWriter ;
2012-11-07 14:15:27 +01:00
import org.apache.solr.response.ResultContext ;
2012-07-19 11:34:05 +02:00
import org.apache.solr.response.SolrQueryResponse ;
2012-08-14 11:12:50 +02:00
import org.apache.solr.response.XSLTResponseWriter ;
2012-11-02 12:29:48 +01:00
import org.apache.solr.util.FastWriter ;
2012-07-19 11:34:05 +02:00
2012-08-09 18:06:48 +02:00
// try
// http://localhost:8090/solr/select?q=*:*&start=0&rows=10&indent=on
2012-08-14 11:12:50 +02:00
/ * *
2012-08-14 12:40:26 +02:00
* this is a standard solr search result formatter as defined in
2012-08-14 11:12:50 +02:00
* http : //wiki.apache.org/solr/SolrQuerySyntax
* /
2012-07-19 11:34:05 +02:00
public class select {
private static SolrServlet solrServlet = new SolrServlet ( ) ;
2012-08-09 18:06:48 +02:00
private final static Map < String , QueryResponseWriter > RESPONSE_WRITER = new HashMap < String , QueryResponseWriter > ( ) ;
2012-07-19 11:34:05 +02:00
static {
2013-07-17 18:31:30 +02:00
try { solrServlet . init ( null ) ; } catch ( final ServletException e ) { }
2012-08-09 18:06:48 +02:00
RESPONSE_WRITER . putAll ( SolrCore . DEFAULT_RESPONSE_WRITERS ) ;
2012-08-14 11:12:50 +02:00
XSLTResponseWriter xsltWriter = new XSLTResponseWriter ( ) ;
2012-11-02 12:29:48 +01:00
OpensearchResponseWriter opensearchResponseWriter = new OpensearchResponseWriter ( ) ;
2012-08-14 11:12:50 +02:00
@SuppressWarnings ( " rawtypes " )
NamedList initArgs = new NamedList ( ) ;
xsltWriter . init ( initArgs ) ;
RESPONSE_WRITER . put ( " xslt " , xsltWriter ) ; // try i.e. http://localhost:8090/solr/select?q=*:*&start=0&rows=10&wt=xslt&tr=json.xsl
2012-08-09 18:06:48 +02:00
RESPONSE_WRITER . put ( " exml " , new EnhancedXMLResponseWriter ( ) ) ;
2013-06-09 12:12:34 +02:00
RESPONSE_WRITER . put ( " html " , new HTMLResponseWriter ( ) ) ;
2013-06-10 18:41:00 +02:00
RESPONSE_WRITER . put ( " grephtml " , new GrepHTMLResponseWriter ( ) ) ;
2012-11-02 12:29:48 +01:00
RESPONSE_WRITER . put ( " rss " , opensearchResponseWriter ) ; //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER . put ( " opensearch " , opensearchResponseWriter ) ; //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
2013-06-26 09:27:22 +02:00
RESPONSE_WRITER . put ( " yjson " , new YJsonResponseWriter ( ) ) ; //try http://localhost:8090/solr/select?wt=json&q=olympia&hl=true&hl.fl=text_t,h1,h2
2012-11-09 16:25:24 +01:00
RESPONSE_WRITER . put ( " gsa " , new GSAResponseWriter ( ) ) ;
2012-07-19 11:34:05 +02:00
}
2012-08-14 11:12:50 +02:00
/ * *
* get the right mime type for this streamed result page
* @param header
* @param post
* @param env
* @return
* /
public static String mime ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
2013-06-28 13:16:25 +02:00
String wt = post = = null ? " xml " : post . get ( CommonParams . WT , " xml " ) ;
2012-08-14 11:12:50 +02:00
if ( wt = = null | | wt . length ( ) = = 0 | | " xml " . equals ( wt ) | | " exml " . equals ( wt ) ) return " text/xml " ;
if ( " xslt " . equals ( wt ) ) {
2013-06-28 13:16:25 +02:00
String tr = post = = null ? " " : post . get ( " tr " , " " ) ;
2012-08-14 11:12:50 +02:00
if ( tr . indexOf ( " json " ) > = 0 ) return " application/json " ;
}
if ( " rss " . equals ( wt ) ) return " application/rss+xml " ;
2012-09-21 21:38:50 +02:00
if ( " exml " . equals ( wt ) ) return " application/rss+xml " ;
2012-08-14 11:12:50 +02:00
if ( " json " . equals ( wt ) ) return " application/json " ;
2012-09-21 21:38:50 +02:00
if ( " yjson " . equals ( wt ) ) return " application/json " ;
2013-06-10 18:41:00 +02:00
if ( " html " . equals ( wt ) | | " grephtml " . equals ( wt ) | | " python " . equals ( wt ) ) return " text/html " ;
2012-08-14 11:12:50 +02:00
if ( " php " . equals ( wt ) | | " phps " . equals ( wt ) ) return " application/x-httpd-php " ;
if ( " ruby " . equals ( wt ) ) return " text/html " ;
if ( " raw " . equals ( wt ) ) return " application/octet-stream " ;
if ( " javabin " . equals ( wt ) ) return " application/octet-stream " ;
if ( " csv " . equals ( wt ) ) return " text/csv " ;
return " text/xml " ;
}
2012-07-19 11:34:05 +02:00
/ * *
* a query to solr , for documentation of parameters see :
* http : //lucene.apache.org/solr/api-3_6_0/doc-files/tutorial.html
* and
* http : //wiki.apache.org/solr/SolrQuerySyntax
* @param header
* @param post
* @param env
* @param out
* @return
* /
2013-06-28 13:16:25 +02:00
public static serverObjects respond ( final RequestHeader header , serverObjects post , final serverSwitch env , final OutputStream out ) {
2012-07-19 11:34:05 +02:00
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
Switchboard sb = ( Switchboard ) env ;
2012-07-23 21:43:14 +02:00
2012-08-16 16:28:57 +02:00
// remember the peer contact for peer statistics
final String clientip = header . get ( HeaderFramework . CONNECTION_PROP_CLIENTIP , " <unknown> " ) ; // read an artificial header addendum
final String userAgent = header . get ( HeaderFramework . USER_AGENT , " <unknown> " ) ;
sb . peers . peerActions . setUserAgent ( clientip , userAgent ) ;
2012-07-23 21:43:14 +02:00
// check if user is allowed to search (can be switched in /ConfigPortal.html)
2012-08-20 17:10:48 +02:00
boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
final boolean searchAllowed = authenticated | | sb . getConfigBool ( " publicSearchpage " , true ) ;
2012-07-23 21:43:14 +02:00
if ( ! searchAllowed ) return null ;
2012-08-09 18:06:48 +02:00
// check post
2013-06-28 13:16:25 +02:00
if ( post = = null ) { post = new serverObjects ( ) ; post . put ( CommonParams . Q , " " ) ; post . put ( CommonParams . ROWS , " 0 " ) ; }
2013-05-15 22:42:05 +02:00
if ( post . size ( ) > 100 ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . warn ( " select " , " rejected bad-formed search request with " + post . size ( ) + " properties from " + header . refererHost ( ) ) ;
2013-05-15 22:42:05 +02:00
return null ; // prevent the worst hacks here...
}
2012-08-20 17:10:48 +02:00
sb . intermissionAllThreads ( 3000 ) ; // tell all threads to do nothing for a specific time
2012-12-02 16:54:29 +01:00
2013-04-16 14:45:14 +02:00
// get the ranking profile id
int profileNr = post . getInt ( " profileNr " , 0 ) ;
2012-08-09 18:06:48 +02:00
// rename post fields according to result style
2013-02-12 03:42:46 +01:00
if ( ! post . containsKey ( CommonParams . Q ) & & post . containsKey ( " query " ) ) {
String querystring = post . get ( " query " , " " ) ;
post . remove ( " query " ) ;
QueryModifier modifier = new QueryModifier ( ) ;
querystring = modifier . parse ( querystring ) ;
modifier . apply ( post ) ;
2013-02-25 14:31:50 +01:00
QueryGoal qg = new QueryGoal ( querystring , querystring ) ;
2013-04-16 14:45:14 +02:00
StringBuilder solrQ = qg . collectionQueryString ( sb . index . fulltext ( ) . getDefaultConfiguration ( ) , profileNr ) ;
2013-02-25 14:31:50 +01:00
post . put ( CommonParams . Q , solrQ . toString ( ) ) ; // sru patch
2013-02-12 03:42:46 +01:00
}
2012-08-22 23:50:40 +02:00
String q = post . get ( CommonParams . Q , " " ) ;
2013-02-25 14:31:50 +01:00
if ( ! post . containsKey ( CommonParams . START ) ) post . put ( CommonParams . START , post . remove ( " startRecord " , 0 ) ) ; // sru patch
if ( ! post . containsKey ( CommonParams . ROWS ) ) post . put ( CommonParams . ROWS , post . remove ( " maximumRecords " , 10 ) ) ; // sru patch
2013-04-23 12:15:33 +02:00
post . put ( CommonParams . ROWS , Math . min ( post . getInt ( CommonParams . ROWS , 10 ) , ( authenticated ) ? 100000000 : 100 ) ) ;
2013-04-12 10:48:41 +02:00
2013-04-16 14:45:14 +02:00
// set ranking according to profile number if ranking attributes are not given in the request
if ( ! post . containsKey ( " sort " ) & & ! post . containsKey ( " bq " ) & & ! post . containsKey ( " bf " ) & & ! post . containsKey ( " boost " ) ) {
2013-04-12 10:48:41 +02:00
if ( ! post . containsKey ( " defType " ) ) post . put ( " defType " , " edismax " ) ;
2013-04-16 14:45:14 +02:00
Ranking ranking = sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) ;
2013-04-12 10:48:41 +02:00
String bq = ranking . getBoostQuery ( ) ;
String bf = ranking . getBoostFunction ( ) ;
2013-04-16 14:45:14 +02:00
if ( bq . length ( ) > 0 ) post . put ( " bq " , bq ) ;
if ( bf . length ( ) > 0 ) post . put ( " boost " , bf ) ; // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
2013-04-12 10:48:41 +02:00
}
2012-08-14 12:40:26 +02:00
// get a response writer for the result
String wt = post . get ( CommonParams . WT , " xml " ) ; // maybe use /solr/select?q=*:*&start=0&rows=10&wt=exml
QueryResponseWriter responseWriter = RESPONSE_WRITER . get ( wt ) ;
if ( responseWriter = = null ) return null ;
if ( responseWriter instanceof OpensearchResponseWriter ) {
// set the title every time, it is possible that it has changed
final String promoteSearchPageGreeting =
( env . getConfigBool ( SwitchboardConstants . GREETING_NETWORK_NAME , false ) ) ? env . getConfig (
" network.unit.description " ,
" " ) : env . getConfig ( SwitchboardConstants . GREETING , " " ) ;
( ( OpensearchResponseWriter ) responseWriter ) . setTitle ( promoteSearchPageGreeting ) ;
}
2012-09-14 12:09:20 +02:00
// if this is a call to YaCys special search formats, enhance the query with field assignments
2013-06-26 09:27:22 +02:00
if ( ( responseWriter instanceof YJsonResponseWriter | | responseWriter instanceof OpensearchResponseWriter ) & & " true " . equals ( post . get ( " hl " , " true " ) ) ) {
2012-08-22 17:37:34 +02:00
// add options for snippet generation
2013-03-20 16:19:49 +01:00
if ( ! post . containsKey ( " hl.q " ) ) post . put ( " hl.q " , q ) ;
if ( ! post . containsKey ( " hl.fl " ) ) post . put ( " hl.fl " , CollectionSchema . h1_txt . getSolrFieldName ( ) + " , " + CollectionSchema . h2_txt . getSolrFieldName ( ) + " , " + CollectionSchema . text_t . getSolrFieldName ( ) ) ;
if ( ! post . containsKey ( " hl.alternateField " ) ) post . put ( " hl.alternateField " , CollectionSchema . description . getSolrFieldName ( ) ) ;
if ( ! post . containsKey ( " hl.simple.pre " ) ) post . put ( " hl.simple.pre " , " <b> " ) ;
if ( ! post . containsKey ( " hl.simple.post " ) ) post . put ( " hl.simple.post " , " </b> " ) ;
if ( ! post . containsKey ( " hl.fragsize " ) ) post . put ( " hl.fragsize " , Integer . toString ( SearchEvent . SNIPPET_MAX_LENGTH ) ) ;
2012-08-22 17:37:34 +02:00
}
2012-08-09 18:06:48 +02:00
2012-08-10 09:48:15 +02:00
// get the embedded connector
2013-02-22 15:45:15 +01:00
boolean defaultConnector = post = = null | | post . get ( " core " , CollectionSchema . CORE_NAME ) . equals ( CollectionSchema . CORE_NAME ) ;
EmbeddedSolrConnector connector = defaultConnector ? sb . index . fulltext ( ) . getDefaultEmbeddedConnector ( ) : sb . index . fulltext ( ) . getEmbeddedConnector ( WebgraphSchema . CORE_NAME ) ;
2012-08-10 09:48:15 +02:00
if ( connector = = null ) return null ;
2012-09-14 12:09:20 +02:00
// do the solr request, generate facets if we use a special YaCy format
2013-02-13 00:01:38 +01:00
SolrParams params = post . toSolrParams ( /*responseWriter instanceof JsonResponseWriter ? new YaCySchema[]{YaCySchema.host_s, YaCySchema.url_file_ext_s, YaCySchema.url_protocol_s} :*/ null ) ;
2012-09-14 12:09:20 +02:00
SolrQueryRequest req = connector . request ( params ) ;
2012-08-09 18:06:48 +02:00
SolrQueryResponse response = null ;
Exception e = null ;
2013-07-17 18:31:30 +02:00
try { response = connector . query ( req ) ; } catch ( final SolrException ee ) { e = ee ; }
2012-08-09 18:06:48 +02:00
if ( response ! = null ) e = response . getException ( ) ;
2012-07-19 11:34:05 +02:00
if ( e ! = null ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2013-06-28 15:19:50 +02:00
if ( req ! = null ) req . close ( ) ;
SolrRequestInfo . clearRequestInfo ( ) ;
2012-07-19 11:34:05 +02:00
return null ;
}
// write the result directly to the output stream
Writer ow = new FastWriter ( new OutputStreamWriter ( out , UTF8 . charset ) ) ;
try {
responseWriter . write ( ow , req , response ) ;
ow . flush ( ) ;
2013-07-17 18:31:30 +02:00
} catch ( final IOException e1 ) {
2012-07-19 11:34:05 +02:00
} finally {
req . close ( ) ;
2013-06-28 15:19:50 +02:00
SolrRequestInfo . clearRequestInfo ( ) ;
2013-07-17 18:31:30 +02:00
try { ow . close ( ) ; } catch ( final IOException e1 ) { }
2012-07-19 11:34:05 +02:00
}
2012-08-22 23:50:40 +02:00
// log result
Object rv = response . getValues ( ) . get ( " response " ) ;
2013-02-25 14:31:50 +01:00
int matches = ( ( ResultContext ) rv ) . docs . matches ( ) ;
2012-11-07 14:15:27 +01:00
if ( rv ! = null & & rv instanceof ResultContext ) {
2013-02-25 14:31:50 +01:00
AccessTracker . addToDump ( q , Integer . toString ( matches ) ) ;
2012-08-22 23:50:40 +02:00
}
2013-02-25 14:31:50 +01:00
2013-07-09 14:28:25 +02:00
ConcurrentLog . info ( " SOLR Query " , " results: " + matches + " , for query: " + post . toString ( ) ) ;
2012-07-19 11:34:05 +02:00
return null ;
}
}