2012-08-14 12:40:26 +02:00
/ * *
* search
* Copyright 2012 by Michael Peter Christen , mc @yacy.net , Frankfurt am Main , Germany
* First released 14 . 08 . 2012 at http : //yacy.net
*
* This library is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
* version 2 . 1 of the License , or ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21 . txt
* If not , see < http : //www.gnu.org/licenses/>.
* /
import java.io.IOException ;
import java.io.OutputStream ;
import java.io.OutputStreamWriter ;
import java.io.Writer ;
2012-09-06 22:35:55 +02:00
import java.util.ArrayList ;
2012-08-21 01:57:46 +02:00
import java.util.Map ;
2012-08-14 12:40:26 +02:00
import net.yacy.cora.document.UTF8 ;
2012-12-02 16:54:29 +01:00
import net.yacy.cora.federate.solr.Boost ;
2012-09-25 21:20:03 +02:00
import net.yacy.cora.federate.solr.YaCySchema ;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector ;
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter ;
2012-08-16 16:28:57 +02:00
import net.yacy.cora.protocol.HeaderFramework ;
2012-08-14 12:40:26 +02:00
import net.yacy.cora.protocol.RequestHeader ;
2012-11-26 13:40:53 +01:00
import net.yacy.cora.util.CommonPattern ;
2012-08-14 12:40:26 +02:00
import net.yacy.kelondro.logging.Log ;
import net.yacy.search.Switchboard ;
2012-12-02 16:54:29 +01:00
import net.yacy.search.SwitchboardConstants ;
2012-08-22 23:50:40 +02:00
import net.yacy.search.query.AccessTracker ;
2012-11-18 01:22:41 +01:00
import net.yacy.search.query.QueryGoal ;
2012-11-01 17:40:06 +01:00
import net.yacy.search.query.SearchEvent ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
2012-08-14 12:40:26 +02:00
import org.apache.solr.common.SolrException ;
import org.apache.solr.common.params.CommonParams ;
import org.apache.solr.request.SolrQueryRequest ;
2012-11-07 14:15:27 +01:00
import org.apache.solr.response.ResultContext ;
2012-08-14 12:40:26 +02:00
import org.apache.solr.response.SolrQueryResponse ;
2012-11-02 12:29:48 +01:00
import org.apache.solr.util.FastWriter ;
2012-08-14 12:40:26 +02:00
// try
2012-08-22 17:37:34 +02:00
// http://localhost:8090/gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
2012-08-14 12:40:26 +02:00
/ * *
* This is a gsa result formatter for solr search results .
* The result format is implemented according to
* https : //developers.google.com/search-appliance/documentation/68/xml_reference#results_xml
* /
public class searchresult {
private final static GSAResponseWriter responseWriter = new GSAResponseWriter ( ) ;
/ * *
* get the right mime type for this streamed result page
* @param header
* @param post
* @param env
* @return
* /
public static String mime ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
return " text/xml " ;
}
2012-08-21 01:57:46 +02:00
2012-08-14 12:40:26 +02:00
/ * *
* @param header
* @param post
* @param env
* @param out
* @return
* /
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env , final OutputStream out ) {
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
Switchboard sb = ( Switchboard ) env ;
2012-08-16 16:28:57 +02:00
// remember the peer contact for peer statistics
final String clientip = header . get ( HeaderFramework . CONNECTION_PROP_CLIENTIP , " <unknown> " ) ; // read an artificial header addendum
final String userAgent = header . get ( HeaderFramework . USER_AGENT , " <unknown> " ) ;
sb . peers . peerActions . setUserAgent ( clientip , userAgent ) ;
2012-08-14 12:40:26 +02:00
// check if user is allowed to search (can be switched in /ConfigPortal.html)
2012-08-20 17:10:48 +02:00
boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
final boolean searchAllowed = authenticated | | sb . getConfigBool ( " publicSearchpage " , true ) ;
2012-08-14 12:40:26 +02:00
if ( ! searchAllowed ) return null ;
// check post
if ( post = = null ) return null ;
2012-09-10 15:20:55 +02:00
Log . logInfo ( " GSA Query " , post . toString ( ) ) ;
2012-08-20 17:10:48 +02:00
sb . intermissionAllThreads ( 3000 ) ; // tell all threads to do nothing for a specific time
2012-08-14 12:40:26 +02:00
2012-12-02 16:54:29 +01:00
// update the boost values
2012-12-05 12:26:42 +01:00
Boost . RANKING . updateBoosts ( sb . getConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_BOOST , " " ) ) ;
2012-12-02 16:54:29 +01:00
2012-08-14 12:40:26 +02:00
// rename post fields according to result style
//post.put(CommonParams.Q, post.remove("q")); // same as solr
//post.put(CommonParams.START, post.remove("start")); // same as solr
//post.put(, post.remove("client"));//required, example: myfrontend
//post.put(, post.remove("output"));//required, example: xml,xml_no_dtd
2012-12-07 14:54:49 +01:00
String originalQuery = post . get ( CommonParams . Q , " " ) ;
post . put ( " originalQuery " , originalQuery ) ;
2012-11-09 16:25:24 +01:00
// get a solr query string
2012-12-15 00:05:46 +01:00
QueryGoal qg = new QueryGoal ( originalQuery , originalQuery ) ;
2012-12-07 14:54:49 +01:00
StringBuilder solrQ = qg . solrQueryString ( sb . index . fulltext ( ) . getSolrScheme ( ) ) ;
post . put ( " defType " , " edismax " ) ;
post . put ( CommonParams . Q , solrQ . toString ( ) ) ;
2012-08-14 12:40:26 +02:00
post . put ( CommonParams . ROWS , post . remove ( " num " ) ) ;
2012-08-29 16:28:32 +02:00
post . put ( CommonParams . ROWS , Math . min ( post . getInt ( CommonParams . ROWS , 10 ) , ( authenticated ) ? 5000 : 100 ) ) ;
2012-12-02 16:54:29 +01:00
float f = Boost . RANKING . get ( YaCySchema . fuzzy_signature_unique_b ) ;
post . put ( " bq " , YaCySchema . fuzzy_signature_unique_b . getSolrFieldName ( ) + " :true^ " + Float . toString ( f ) ) ; // a boost query that moves double content to the back
2012-11-22 13:03:33 +01:00
post . put ( CommonParams . FL ,
YaCySchema . content_type . getSolrFieldName ( ) + ',' +
YaCySchema . id . getSolrFieldName ( ) + ',' +
YaCySchema . sku . getSolrFieldName ( ) + ',' +
YaCySchema . title . getSolrFieldName ( ) + ',' +
YaCySchema . description . getSolrFieldName ( ) + ',' +
YaCySchema . load_date_dt . getSolrFieldName ( ) + ',' +
YaCySchema . last_modified . getSolrFieldName ( ) + ',' +
YaCySchema . size_i . getSolrFieldName ( ) ) ;
2012-08-22 17:37:34 +02:00
post . put ( " hl " , " true " ) ;
2012-12-07 14:54:49 +01:00
post . put ( " hl.q " , originalQuery ) ;
2012-10-18 15:26:55 +02:00
post . put ( " hl.fl " , YaCySchema . h1_txt . getSolrFieldName ( ) + " , " + YaCySchema . h2_txt . getSolrFieldName ( ) + " , " + YaCySchema . text_t . getSolrFieldName ( ) ) ;
post . put ( " hl.alternateField " , YaCySchema . description . getSolrFieldName ( ) ) ;
2012-08-22 17:37:34 +02:00
post . put ( " hl.simple.pre " , " <b> " ) ;
post . put ( " hl.simple.post " , " </b> " ) ;
2012-11-01 17:40:06 +01:00
post . put ( " hl.fragsize " , Integer . toString ( SearchEvent . SNIPPET_MAX_LENGTH ) ) ;
2012-08-21 02:39:28 +02:00
GSAResponseWriter . Sort sort = new GSAResponseWriter . Sort ( post . get ( CommonParams . SORT , " " ) ) ;
2012-08-21 01:57:46 +02:00
String sorts = sort . toSolr ( ) ;
if ( sorts = = null ) {
post . remove ( CommonParams . SORT ) ;
} else {
post . put ( CommonParams . SORT , sorts ) ;
}
2012-12-07 14:54:49 +01:00
String site = post . remove ( " site " ) ; // example: col1|col2
2012-08-21 01:57:46 +02:00
String access = post . remove ( " access " ) ;
String entqr = post . remove ( " entqr " ) ;
2012-08-14 12:40:26 +02:00
2012-09-06 22:35:55 +02:00
// add sites operator
2012-09-03 15:26:08 +02:00
if ( site ! = null & & site . length ( ) > 0 ) {
2012-11-26 13:40:53 +01:00
String [ ] s0 = CommonPattern . VERTICALBAR . split ( site ) ;
2012-09-06 22:35:55 +02:00
ArrayList < String > sites = new ArrayList < String > ( 2 ) ;
for ( String s : s0 ) {
s = s . trim ( ) . toLowerCase ( ) ;
if ( s . length ( ) > 0 ) sites . add ( s ) ;
}
2012-12-07 14:54:49 +01:00
StringBuilder fq = new StringBuilder ( 20 ) ;
2012-09-06 22:35:55 +02:00
if ( sites . size ( ) > 1 ) {
2012-12-07 14:54:49 +01:00
fq . append ( YaCySchema . collection_sxt . getSolrFieldName ( ) ) . append ( ':' ) . append ( sites . get ( 0 ) ) ;
2012-09-06 22:35:55 +02:00
for ( int i = 1 ; i < sites . size ( ) ; i + + ) {
2012-12-07 14:54:49 +01:00
fq . append ( " OR " ) . append ( YaCySchema . collection_sxt . getSolrFieldName ( ) ) . append ( ':' ) . append ( sites . get ( i ) ) ;
2012-09-06 22:35:55 +02:00
}
} else if ( sites . size ( ) = = 1 ) {
2012-12-07 14:54:49 +01:00
fq . append ( YaCySchema . collection_sxt . getSolrFieldName ( ) ) . append ( ':' ) . append ( sites . get ( 0 ) ) ;
2012-09-06 22:35:55 +02:00
}
2012-12-07 14:54:49 +01:00
post . put ( CommonParams . FQ , fq . toString ( ) ) ;
2012-09-03 15:26:08 +02:00
}
2012-10-02 14:29:45 +02:00
2012-08-14 12:40:26 +02:00
// get the embedded connector
2012-08-17 15:52:33 +02:00
EmbeddedSolrConnector connector = ( EmbeddedSolrConnector ) sb . index . fulltext ( ) . getLocalSolr ( ) ;
2012-08-14 12:40:26 +02:00
if ( connector = = null ) return null ;
// do the solr request
2012-09-14 12:09:20 +02:00
SolrQueryRequest req = connector . request ( post . toSolrParams ( null ) ) ;
2012-08-14 12:40:26 +02:00
SolrQueryResponse response = null ;
Exception e = null ;
try { response = connector . query ( req ) ; } catch ( SolrException ee ) { e = ee ; }
if ( response ! = null ) e = response . getException ( ) ;
if ( e ! = null ) {
Log . logException ( e ) ;
return null ;
}
2012-08-21 01:57:46 +02:00
// set some context for the writer
Map < Object , Object > context = req . getContext ( ) ;
context . put ( " ip " , header . get ( " CLIENTIP " , " " ) ) ;
2012-08-31 14:00:53 +02:00
context . put ( " client " , " vsm_frontent " ) ;
2012-08-21 01:57:46 +02:00
context . put ( " sort " , sort . sort ) ;
context . put ( " site " , site = = null ? " " : site ) ;
context . put ( " access " , access = = null ? " p " : access ) ;
context . put ( " entqr " , entqr = = null ? " 3 " : entqr ) ;
2012-08-14 12:40:26 +02:00
// write the result directly to the output stream
Writer ow = new FastWriter ( new OutputStreamWriter ( out , UTF8 . charset ) ) ;
try {
responseWriter . write ( ow , req , response ) ;
ow . flush ( ) ;
} catch ( IOException e1 ) {
} finally {
req . close ( ) ;
try { ow . close ( ) ; } catch ( IOException e1 ) { }
}
2012-08-22 23:50:40 +02:00
// log result
Object rv = response . getValues ( ) . get ( " response " ) ;
2012-11-07 14:15:27 +01:00
if ( rv ! = null & & rv instanceof ResultContext ) {
2012-12-07 14:54:49 +01:00
AccessTracker . addToDump ( originalQuery , Integer . toString ( ( ( ResultContext ) rv ) . docs . matches ( ) ) ) ;
2012-08-22 23:50:40 +02:00
}
2012-08-14 12:40:26 +02:00
return null ;
}
}