yacy_search_server/htroot/ContentAnalysis_p.java
Michael Peter Christen addba047e2 changes in ranking computation
- an existing ranking servlet for solr was extended. It is now possible
to set boost values for fields, boost functions and boost queries.
- The ranking can have different instances, but currently only the first
one is used
- added an abstraction layer for fields which can be used for search and
those fields can be edited in the solr ranking configruation
- the ranking value from solr within the field score is used to combine
remote search requests, which all are created using the same locally
defined boost values
- reduced the number of fields which are used for search (makes it
faster)
- replaced some text fields by string fields (makes indexing faster)
- removed classes which had no use
- made a large number of experiments for a better ranking and created a
temporary setting which prefers hits inside titles
- adjusted also the RWI-based ranking computation to 'prefer title'
- made special cases like for portal search where no post-processing and
post-ranking is wanted: this keeps the original ranking order as done by
Solr
- fixed many bugs with old settings for ranking
2013-03-13 14:47:00 +01:00

69 lines
2.9 KiB
Java

/**
* ContentAnalysis_p
* Copyright 2013 by Michael Peter Christen
* First released 12.03.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import net.yacy.cora.federate.solr.Ranking;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class ContentAnalysis_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
// clean up all search events
SearchEventCache.cleanupEvents(true);
sb.index.fulltext().clearCache(); // every time the ranking is changed we need to remove old orderings
if (post != null && post.containsKey("EnterDoublecheck")) {
Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));
Ranking.setQuantRate(post.getFloat("quantRate", 0.5f));
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, Ranking.getMinTokenLen());
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, Ranking.getQuantRate());
}
if (post != null && post.containsKey("ResetDoublecheck")) {
Ranking.setMinTokenLen(3);
Ranking.setQuantRate(0.5f);
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, Ranking.getMinTokenLen());
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, Ranking.getQuantRate());
}
if (post != null && post.containsKey("ResetRanking")) {
Ranking.setMinTokenLen(3);
Ranking.setQuantRate(0.5f);
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, Ranking.getMinTokenLen());
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, Ranking.getQuantRate());
}
final serverObjects prop = new serverObjects();
prop.put("minTokenLen", Ranking.getMinTokenLen());
prop.put("quantRate", Ranking.getQuantRate());
return prop;
}
}