Added a new servlet to configure the solr ranking using field boosts

This commit is contained in:
Michael Peter Christen 2012-12-03 17:01:19 +01:00
parent a598fb6227
commit 908ad2f174
5 changed files with 137 additions and 4 deletions

View File

@ -1,7 +1,7 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Ranking Configuration</title>
<title>YaCy '#[clientname]#': RWI Ranking Configuration</title>
#%env/templates/metas.template%#
<script>
$(function() {
@ -27,7 +27,7 @@
<body id="RankingRWI_p">
#%env/templates/header.template%#
#%env/templates/submenuSearchConfiguration.template%#
<h2>Ranking Configuration</h2>
<h2>RWI Ranking Configuration</h2>
<p>The document ranking influences the order of the search result entities.
A ranking is computed using a number of attributes from the documents that match with the search word.
The attributes are first normalized over all search results and then the normalized attribut is multiplied with the ranking coefficient computed from this list.

49
htroot/RankingSolr_p.html Normal file
View File

@ -0,0 +1,49 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Solr Ranking Configuration</title>
#%env/templates/metas.template%#
</head>
<body id="RankingSolr_p">
#%env/templates/header.template%#
#%env/templates/submenuSearchConfiguration.template%#
<h2>Solr Ranking Configuration</h2>
<p>These are ranking attributes for Solr. This ranking applies for internal and remote Solr acess.</p>
<form class="dsearch" action="RankingSolr_p.html" method="post" enctype="multipart/form-data">
<fieldset>
<legend>Solr Double Content Detection</legend><p>Double-Content detection is done using a ranking on a 'unique'-Field, named 'fuzzy_signature_unique_b'.
This field is set during parsing and is influenced by two attributes for the <a href="http://lucene.apache.org/solr/api-4_0_0-BETA/org/apache/solr/update/processor/TextProfileSignature.html">TextProfileSignature</a> class.</p>
<dl>
<dt style="width:260px"><label for="minTokenLen">minTokenLen</label></dt>
<dd style="width:360px; float:left; display:inline;" id="minTokenLen">
<input name="minTokenLen" id="minTokenLen" type="text" align="right" size="10" value="#[minTokenLen]#"><br/>
This is the minimum length of a word which shall be considered as element of the signature. Should be either 2 or 3.
</dd>
<dt style="width:260px"><label for="quantRate">quantRate</label></dt>
<dd style="width:360px; float:left; display:inline;" id="quantRate">
<input name="quantRate" id="quantRate" type="text" align="right" size="10" value="#[quantRate]#"><br/>
The quantRate is a measurement for the number of words that take part in a signature computation. The higher the number, the less
words are used for the signature.
For minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5.
</dd>
</dl>
<input type="submit" name="EnterDoublecheck" value="Set" />
<input type="submit" name="ResetDoublecheck" value="Re-Set to default" />
</fieldset>
</form>
<form class="dsearch" action="RankingSolr_p.html" method="post" enctype="multipart/form-data">
<fieldset>
<legend>Solr Boosts</legend>
<dl>#{boosts}#
<dt style="width:260px"><label for="#[field]#">#[field]#</label></dt>
<dd style="width:360px; float:left; display:inline;" id="boost_dd_#[field]#">
<input name="boost_#[field]#" id="boost_#[field]#" type="text" align="right" size="10" value="#[boost]#">
</dd>#{/boosts}#
</dl>
<input type="submit" name="EnterRanking" value="Set" />
<input type="submit" name="ResetRanking" value="Re-Set to default" />
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

80
htroot/RankingSolr_p.java Normal file
View File

@ -0,0 +1,80 @@
import java.util.Map;
import net.yacy.cora.federate.solr.Boost;
import net.yacy.cora.federate.solr.YaCySchema;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SearchEventCache;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class RankingSolr_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
// clean up all search events
SearchEventCache.cleanupEvents(true);
if (post != null && post.containsKey("EnterDoublecheck")) {
Boost.RANKING.setMinTokenLen(post.getInt("minTokenLen", 3));
Boost.RANKING.setQuantRate(post.getFloat("quantRate", 0.5f));
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, Boost.RANKING.getMinTokenLen());
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, Boost.RANKING.getQuantRate());
}
if (post != null && post.containsKey("ResetDoublecheck")) {
Boost.RANKING.setMinTokenLen(3);
Boost.RANKING.setQuantRate(0.5f);
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, Boost.RANKING.getMinTokenLen());
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, Boost.RANKING.getQuantRate());
}
if (post != null && post.containsKey("EnterRanking")) {
StringBuilder boostString = new StringBuilder(); // SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST;
for (Map.Entry<String, String> entry: post.entrySet()) {
if (entry.getKey().startsWith("boost")) {
String fieldName = entry.getKey().substring(6);
YaCySchema field = YaCySchema.valueOf(fieldName);
if (field == null) continue;
try {
float boost = Float.parseFloat(entry.getValue());
if (boostString.length() > 0) boostString.append(',');
boostString.append(field.getSolrFieldName()).append('^').append(Float.toString(boost));
} catch (NumberFormatException e) {
continue;
}
}
}
if (boostString.length() > 0) {
String s = boostString.toString();
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, s);
Boost.RANKING.update(s);
}
}
if (post != null && post.containsKey("ResetRanking")) {
Boost.RANKING.initDefaults();
}
final serverObjects prop = new serverObjects();
prop.put("minTokenLen", Boost.RANKING.getMinTokenLen());
prop.put("quantRate", Boost.RANKING.getQuantRate());
int i = 0;
for (Map.Entry<YaCySchema, Float> entry: Boost.RANKING.entrySet()) {
YaCySchema field = entry.getKey();
float boost = entry.getValue();
prop.put("boosts_" + i + "_field", field.getSolrFieldName());
prop.put("boosts_" + i + "_boost", Float.toString(boost));
i++;
}
prop.put("boosts", i);
return prop;
}
}

View File

@ -5,6 +5,7 @@
<li><a href="/ConfigAppearance_p.html" class="MenuItemLink lock">Appearance</a></li>
<li><a href="/ConfigLanguage_p.html" class="MenuItemLink lock">Language</a></li>
<li><a href="/ConfigProfile_p.html" class="MenuItemLink lock">User Profile</a></li>
<li><a href="/RankingRWI_p.html" class="MenuItemLink lock">Ranking Config</a></li>
<li><a href="/RankingSolr_p.html" class="MenuItemLink lock">Solr Ranking Config</a></li>
<li><a href="/RankingRWI_p.html" class="MenuItemLink lock">RWI Ranking Config</a></li>
</ul>
</div>

View File

@ -42,6 +42,10 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
private Boost() {
super();
this.initDefaults();
}
public void initDefaults() {
put(YaCySchema.sku, 20.0f);
put(YaCySchema.url_paths_sxt, 20.0f);
put(YaCySchema.title, 15.0f);
@ -53,7 +57,6 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
put(YaCySchema.text_t, 1.0f);
put(YaCySchema.fuzzy_signature_unique_b, 100000.0f); // must be very high to move double results to end of list
}
/**
* override the get method to return 1.0f for each non-resolvable object