2012-12-05 12:26:42 +01:00
/ * *
* RankingSolr_p
* Copyright 2012 by Michael Peter Christen
* First released 30 . 11 . 2012 at http : //yacy.net
*
* This library is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ; either
* version 2 . 1 of the License , or ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21 . txt
* If not , see < http : //www.gnu.org/licenses/>.
* /
2012-12-03 17:01:19 +01:00
2014-11-20 18:46:06 +01:00
import java.io.IOException ;
import java.util.ArrayList ;
import java.util.Collection ;
import java.util.HashMap ;
import java.util.LinkedHashMap ;
2012-12-03 17:01:19 +01:00
import java.util.Map ;
2014-11-20 18:46:06 +01:00
import java.util.TreeMap ;
import java.util.Map.Entry ;
2012-12-03 17:01:19 +01:00
2013-03-13 14:47:00 +01:00
import net.yacy.cora.federate.solr.Ranking ;
import net.yacy.cora.federate.solr.SchemaDeclaration ;
2012-12-03 17:01:19 +01:00
import net.yacy.cora.protocol.RequestHeader ;
2014-11-20 18:46:06 +01:00
import net.yacy.cora.sorting.ReversibleScoreMap ;
import net.yacy.cora.util.ConcurrentLog ;
2012-12-03 17:01:19 +01:00
import net.yacy.search.Switchboard ;
import net.yacy.search.SwitchboardConstants ;
import net.yacy.search.query.SearchEventCache ;
2014-10-26 23:33:21 +01:00
import net.yacy.search.schema.CollectionConfiguration ;
2013-02-21 13:23:55 +01:00
import net.yacy.search.schema.CollectionSchema ;
2012-12-03 17:01:19 +01:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
2014-11-20 18:46:06 +01:00
2014-07-20 12:28:59 +02:00
import org.apache.solr.common.params.CommonParams ;
import org.apache.solr.common.params.DisMaxParams ;
2012-12-03 17:01:19 +01:00
public class RankingSolr_p {
public static serverObjects respond ( @SuppressWarnings ( " unused " ) final RequestHeader header , final serverObjects post , final serverSwitch env ) {
final Switchboard sb = ( Switchboard ) env ;
// clean up all search events
SearchEventCache . cleanupEvents ( true ) ;
2013-11-07 21:30:17 +01:00
sb . index . clearCaches ( ) ; // every time the ranking is changed we need to remove old orderings
2013-04-16 11:38:51 +02:00
int profileNr = 0 ;
if ( post ! = null ) profileNr = post . getInt ( " profileNr " , profileNr ) ;
2012-12-03 17:01:19 +01:00
2013-03-13 14:47:00 +01:00
if ( post ! = null & & post . containsKey ( " EnterBoosts " ) ) {
2012-12-03 17:01:19 +01:00
StringBuilder boostString = new StringBuilder ( ) ; // SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST;
for ( Map . Entry < String , String > entry : post . entrySet ( ) ) {
if ( entry . getKey ( ) . startsWith ( " boost " ) ) {
String fieldName = entry . getKey ( ) . substring ( 6 ) ;
2013-02-21 13:23:55 +01:00
CollectionSchema field = CollectionSchema . valueOf ( fieldName ) ;
2012-12-03 17:01:19 +01:00
if ( field = = null ) continue ;
2013-03-13 14:47:00 +01:00
String fieldValue = entry . getValue ( ) ;
if ( fieldValue = = null | | fieldValue . length ( ) = = 0 ) continue ;
2012-12-03 17:01:19 +01:00
try {
2013-03-13 14:47:00 +01:00
float boost = Float . parseFloat ( fieldValue ) ;
2014-07-20 12:28:59 +02:00
if ( boost > 0 . 0f ) { // don't allow <= 0
if ( boostString . length ( ) > 0 ) boostString . append ( ',' ) ;
boostString . append ( field . getSolrFieldName ( ) ) . append ( '^' ) . append ( Float . toString ( boost ) ) ;
}
2013-07-17 18:31:30 +02:00
} catch ( final NumberFormatException e ) {
2012-12-03 17:01:19 +01:00
continue ;
}
}
}
if ( boostString . length ( ) > 0 ) {
String s = boostString . toString ( ) ;
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + profileNr , s ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . updateBoosts ( s ) ;
2012-12-03 17:01:19 +01:00
}
2013-03-13 14:47:00 +01:00
}
if ( post ! = null & & post . containsKey ( " ResetBoosts " ) ) {
2014-07-21 00:47:14 +02:00
String s = " url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,author^1.0 " ;
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + profileNr , s ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . updateBoosts ( s ) ;
2012-12-03 17:01:19 +01:00
}
2013-03-13 14:47:00 +01:00
if ( post ! = null & & post . containsKey ( " EnterBQ " ) ) {
2014-07-20 12:28:59 +02:00
String bq = post . get ( DisMaxParams . BQ ) ;
2013-03-13 14:47:00 +01:00
if ( bq ! = null ) {
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr , bq ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setBoostQuery ( bq ) ;
2013-03-13 14:47:00 +01:00
}
}
if ( post ! = null & & post . containsKey ( " ResetBQ " ) ) {
2014-04-16 22:16:20 +02:00
String bq = " crawldepth_i:0^0.8 crawldepth_i:1^0.4 " ;
2013-03-13 14:47:00 +01:00
if ( bq ! = null ) {
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr , bq ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setBoostQuery ( bq ) ;
2013-03-13 14:47:00 +01:00
}
}
2014-06-15 12:38:30 +02:00
if ( post ! = null & & post . containsKey ( " EnterFQ " ) ) {
2014-07-20 12:28:59 +02:00
String fq = post . get ( CommonParams . FQ ) ;
2014-06-15 12:38:30 +02:00
if ( fq ! = null ) {
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + profileNr , fq ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setFilterQuery ( fq ) ;
}
}
if ( post ! = null & & post . containsKey ( " ResetFQ " ) ) {
String fq = " " ; // i.e. "http_unique_b:true AND www_unique_b:true"
if ( fq ! = null ) {
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_FILTERQUERY_ + profileNr , fq ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setFilterQuery ( fq ) ;
}
}
2013-03-13 14:47:00 +01:00
if ( post ! = null & & post . containsKey ( " EnterBF " ) ) {
2014-07-20 12:28:59 +02:00
String bf = post . get ( DisMaxParams . BF ) ;
2013-03-13 14:47:00 +01:00
if ( bf ! = null ) {
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + profileNr , bf ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setBoostFunction ( bf ) ;
2013-03-13 14:47:00 +01:00
}
}
if ( post ! = null & & post . containsKey ( " ResetBF " ) ) {
2013-10-09 15:10:03 +02:00
String bf = " " ;
2013-03-13 14:47:00 +01:00
if ( bf ! = null ) {
2013-04-16 11:38:51 +02:00
sb . setConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + profileNr , bf ) ;
sb . index . fulltext ( ) . getDefaultConfiguration ( ) . getRanking ( profileNr ) . setBoostFunction ( bf ) ;
2013-03-13 14:47:00 +01:00
}
2012-12-03 17:01:19 +01:00
}
final serverObjects prop = new serverObjects ( ) ;
int i = 0 ;
2014-10-26 23:33:21 +01:00
CollectionConfiguration colcfg = sb . index . fulltext ( ) . getDefaultConfiguration ( ) ;
Ranking ranking = colcfg . getRanking ( profileNr ) ;
2013-03-13 14:47:00 +01:00
for ( SchemaDeclaration field : CollectionSchema . values ( ) ) {
if ( ! field . isSearchable ( ) ) continue ;
Float boost = ranking . getFieldBoost ( field ) ;
2014-10-26 23:33:21 +01:00
if ( boost ! = null | | colcfg . contains ( field ) ) { // show only available or configured boost fields
prop . put ( " boosts_ " + i + " _field " , field . getSolrFieldName ( ) ) ;
if ( boost = = null | | boost . floatValue ( ) < = 0 . 0f ) {
prop . put ( " boosts_ " + i + " _checked " , 0 ) ;
prop . put ( " boosts_ " + i + " _boost " , " " ) ;
prop . put ( " boosts_ " + i + " _notinindexwarning " , " 0 " ) ;
} else {
prop . put ( " boosts_ " + i + " _checked " , 1 ) ;
prop . put ( " boosts_ " + i + " _boost " , boost . toString ( ) ) ;
prop . put ( " boosts_ " + i + " _notinindexwarning " , ( colcfg . contains ( field . name ( ) ) ? " 0 " : " 1 " ) ) ;
}
prop . putHTML ( " boosts_ " + i + " _comment " , field . getComment ( ) ) ;
i + + ;
2013-03-13 14:47:00 +01:00
}
2012-12-03 17:01:19 +01:00
}
prop . put ( " boosts " , i ) ;
2014-07-20 12:28:59 +02:00
prop . put ( CommonParams . FQ , ranking . getFilterQuery ( ) ) ;
prop . put ( DisMaxParams . BQ , ranking . getBoostQuery ( ) ) ;
prop . put ( DisMaxParams . BF , ranking . getBoostFunction ( ) ) ;
2012-12-03 17:01:19 +01:00
2013-04-16 11:38:51 +02:00
for ( int j = 0 ; j < 4 ; j + + ) {
prop . put ( " profiles_ " + j + " _nr " , j ) ;
prop . put ( " profiles_ " + j + " _name " , sb . getConfig ( SwitchboardConstants . SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ + j , " N.N. " ) ) ;
prop . put ( " profiles_ " + j + " _selected " , profileNr = = j ? 1 : 0 ) ;
}
prop . put ( " profiles " , 4 ) ;
prop . put ( " profileNr " , profileNr ) ;
2014-11-20 18:46:06 +01:00
// make boost hints for vocabularies
Map < String , ReversibleScoreMap < String > > vocabularyFacet ;
try {
vocabularyFacet = sb . index . fulltext ( ) . getDefaultConnector ( ) . getFacets ( CollectionSchema . vocabularies_sxt . getSolrFieldName ( ) + " :[* TO *] " , 100 , CollectionSchema . vocabularies_sxt . getSolrFieldName ( ) ) ;
} catch ( IOException e ) {
ConcurrentLog . logException ( e ) ;
vocabularyFacet = new HashMap < > ( ) ;
}
if ( vocabularyFacet . size ( ) = = 0 ) {
prop . put ( " boosthint " , 0 ) ;
} else {
prop . put ( " boosthint " , 1 ) ;
prop . putHTML ( " boosthint_vocabulariesfield " , CollectionSchema . vocabularies_sxt . getSolrFieldName ( ) ) ;
ReversibleScoreMap < String > vokcounts = vocabularyFacet . values ( ) . iterator ( ) . next ( ) ;
Collection < String > vocnames = vokcounts . keyList ( true ) ;
prop . putHTML ( " boosthint_vocabulariesavailable " , vocnames . toString ( ) ) ;
ArrayList < String > voccountFields = new ArrayList < > ( ) ;
ArrayList < String > voclogcountFields = new ArrayList < > ( ) ;
ArrayList < String > voclogcountsFields = new ArrayList < > ( ) ;
ArrayList < String > ff = new ArrayList < > ( ) ;
for ( String vocname : vocnames ) {
voccountFields . add ( CollectionSchema . VOCABULARY_PREFIX + vocname + CollectionSchema . VOCABULARY_COUNT_SUFFIX ) ;
voclogcountFields . add ( CollectionSchema . VOCABULARY_PREFIX + vocname + CollectionSchema . VOCABULARY_LOGCOUNT_SUFFIX ) ;
voclogcountsFields . add ( CollectionSchema . VOCABULARY_PREFIX + vocname + CollectionSchema . VOCABULARY_LOGCOUNTS_SUFFIX ) ;
}
ff . addAll ( voclogcountFields ) ;
ff . addAll ( voclogcountsFields ) ;
prop . putHTML ( " boosthint_vocabulariesvoccount " , voccountFields . toString ( ) ) ;
prop . putHTML ( " boosthint_vocabulariesvoclogcount " , voclogcountFields . toString ( ) ) ;
prop . putHTML ( " boosthint_vocabulariesvoclogcounts " , voclogcountsFields . toString ( ) ) ;
String [ ] facetfields = ff . toArray ( new String [ ff . size ( ) ] ) ;
int fc = 0 ;
try {
LinkedHashMap < String , ReversibleScoreMap < String > > facets = sb . index . fulltext ( ) . getDefaultConnector ( ) . getFacets ( " *:* " , 100 , facetfields ) ;
facets . put ( CollectionSchema . vocabularies_sxt . getSolrFieldName ( ) , vokcounts ) ;
for ( Map . Entry < String , ReversibleScoreMap < String > > facetentry : facets . entrySet ( ) ) {
ReversibleScoreMap < String > facetfieldmap = facetentry . getValue ( ) ;
if ( facetfieldmap . size ( ) = = 0 ) continue ;
TreeMap < String , Integer > statMap = new TreeMap < > ( ) ;
for ( String k : facetfieldmap ) statMap . put ( k , facetfieldmap . get ( k ) ) ;
prop . put ( " boosthint_facets_ " + fc + " _facetname " , facetentry . getKey ( ) ) ;
int c = 0 ; for ( Entry < String , Integer > entry : statMap . entrySet ( ) ) {
prop . put ( " boosthint_facets_ " + fc + " _facet_ " + c + " _key " , entry . getKey ( ) ) ;
prop . put ( " boosthint_facets_ " + fc + " _facet_ " + c + " _count " , entry . getValue ( ) ) ;
c + + ;
}
prop . put ( " boosthint_facets_ " + fc + " _facet " , c ) ;
fc + + ;
}
} catch ( IOException e ) {
}
prop . put ( " boosthint_facets " , fc ) ;
}
2012-12-03 17:01:19 +01:00
return prop ;
}
}