enhanced ranking; patches to replace old ranking

This commit is contained in:
Michael Peter Christen 2013-10-09 15:10:03 +02:00
parent 1b61bd40ed
commit 90c8577840
3 changed files with 24 additions and 15 deletions

View File

@ -962,20 +962,20 @@ search.ranking.rwi.profile =
# The field boostfunctionmode can be either 'add' or 'multiply' to describe the mode.
# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name
search.ranking.solr.collection.boostname.tmpa.0=Default Profile
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0
search.ranking.solr.collection.boostquery.tmpa.0=fuzzy_signature_unique_b:true^100000.0
search.ranking.solr.collection.boostfunction.tmpb.0=scale(cr_host_norm_i,1,20)
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0
search.ranking.solr.collection.boostquery.tmpa.0=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.0=
search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/data' usage
search.ranking.solr.collection.boostfields.tmpa.1=text_t^1.0
search.ranking.solr.collection.boostquery.tmpa.1=fuzzy_signature_unique_b:true^100000.0
search.ranking.solr.collection.boostquery.tmpa.1=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.1=recip(rord(last_modified),1,1000,1000)
search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a singe domain only, i.e. if a site:-operator is used
search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0
search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^100000.0
search.ranking.solr.collection.boostfunction.tmpb.2=scale(cr_host_norm_i,1,20)
search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0
search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0
search.ranking.solr.collection.boostfunction.tmpb.2=
search.ranking.solr.collection.boostname.tmpa.3=_unused3
search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0
search.ranking.solr.collection.boostquery.tmpa.3=fuzzy_signature_unique_b:true^100000.0
search.ranking.solr.collection.boostquery.tmpa.3=clickdepth_i:0^0.8 clickdepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.3=
# the following values are used to identify duplicate content

View File

@ -68,7 +68,7 @@ public class RankingSolr_p {
}
}
if (post != null && post.containsKey("ResetBoosts")) {
String s = "url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0";
String s = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0";
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + profileNr, s);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).updateBoosts(s);
}
@ -81,7 +81,7 @@ public class RankingSolr_p {
}
}
if (post != null && post.containsKey("ResetBQ")) {
String bq = "fuzzy_signature_unique_b:true^100000.0";
String bq = "clickdepth_i:0^0.8 clickdepth_i:1^0.4";
if (bq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr, bq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostQuery(bq);
@ -96,7 +96,7 @@ public class RankingSolr_p {
}
}
if (post != null && post.containsKey("ResetBF")) {
String bf = "product(recip(rord(last_modified),1,1000,1000),div(product(log(product(references_external_i,references_exthosts_i)),div(references_internal_i,host_extent_i)),add(clickdepth_i,1)))";
String bf = "";
if (bf != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + profileNr, bf);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostFunction(bf);

View File

@ -469,10 +469,19 @@ public final class Switchboard extends serverSwitch {
for (int i = 0; i <= 3; i++) {
// must be done every time the boosts change
Ranking r = solrCollectionConfigurationWork.getRanking(i);
r.setName(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ + i, "_dummy" + i));
r.updateBoosts(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + i, "text_t^1.0"));
r.setBoostQuery(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + i, ""));
r.setBoostFunction(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + i, ""));
String name = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTNAME_ + i, "_dummy" + i);
String boosts = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFIELDS_ + i, "text_t^1.0");
String bq = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + i, "");
String bf = this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTFUNCTION_ + i, "");
// apply some hard-coded patches for earlier experiments we do not want any more
if (bf.equals("product(recip(rord(last_modified),1,1000,1000),div(product(log(product(references_external_i,references_exthosts_i)),div(references_internal_i,host_extent_i)),add(clickdepth_i,1)))") ||
bf.equals("scale(cr_host_norm_i,1,20)")) bf = "";
if (i == 0 && bq.equals("fuzzy_signature_unique_b:true^100000.0")) bq = "clickdepth_i:0^0.8 clickdepth_i:1^0.4";
if (boosts.equals("url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0")) boosts = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^2.0";
r.setName(name);
r.updateBoosts(boosts);
r.setBoostQuery(bq);
r.setBoostFunction(bf);
}
// initialize index