refactoring of search classes -- moved Ranking Profile to search package

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6086 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-06-16 21:45:40 +00:00
parent 1457bfce16
commit be1c7ddc64
10 changed files with 69 additions and 68 deletions

View File

@ -31,9 +31,9 @@ import java.util.Map.Entry;
import de.anomic.http.httpRequestHeader;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -44,38 +44,38 @@ public class Ranking_p {
private static final HashMap<String, String> rankingParameters = new HashMap<String, String>();
static {
rankingParameters.put(plasmaSearchRankingProfile.APP_DC_CREATOR, "Appearance In Author");
rankingParameters.put(plasmaSearchRankingProfile.APP_DC_TITLE, "Appearance In Title");
rankingParameters.put(plasmaSearchRankingProfile.APPEMPH, "Appearance In Emphasized Text");
rankingParameters.put(plasmaSearchRankingProfile.APP_DC_DESCRIPTION, "Appearance In Reference/Anchor Name");
rankingParameters.put(plasmaSearchRankingProfile.APP_DC_SUBJECT, "Appearance In Tags");
rankingParameters.put(plasmaSearchRankingProfile.APPURL, "Appearance In URL");
rankingParameters.put(plasmaSearchRankingProfile.AUTHORITY, "Authority of Domain");
rankingParameters.put(plasmaSearchRankingProfile.CATHASAPP, "Category App, Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATHASAUDIO, "Category Audio Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATHASIMAGE, "Category Image Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATHASVIDEO, "Category Video Appearance");
rankingParameters.put(plasmaSearchRankingProfile.CATINDEXOF, "Category Index Page");
rankingParameters.put(plasmaSearchRankingProfile.DATE, "Date");
rankingParameters.put(plasmaSearchRankingProfile.DESCRCOMPINTOPLIST, "Description Comp. Appears In Toplist");
rankingParameters.put(plasmaSearchRankingProfile.DOMLENGTH, "Domain Length");
rankingParameters.put(plasmaSearchRankingProfile.HITCOUNT, "Hit Count");
rankingParameters.put(plasmaSearchRankingProfile.LLOCAL, "Links To Local Domain");
rankingParameters.put(plasmaSearchRankingProfile.LOTHER, "Links To Other Domain");
rankingParameters.put(plasmaSearchRankingProfile.PHRASESINTEXT, "Phrases In Text");
rankingParameters.put(plasmaSearchRankingProfile.POSINTEXT, "Position In Text");
rankingParameters.put(plasmaSearchRankingProfile.POSOFPHRASE, "Position Of Phrase");
rankingParameters.put(plasmaSearchRankingProfile.POSINPHRASE, "Position In Phrase");
rankingParameters.put(plasmaSearchRankingProfile.PREFER, "Application Of Prefer Pattern");
rankingParameters.put(plasmaSearchRankingProfile.TERMFREQUENCY, "Term Frequency");
rankingParameters.put(plasmaSearchRankingProfile.URLCOMPINTOPLIST, "URL Component Appears In Toplist");
rankingParameters.put(plasmaSearchRankingProfile.URLCOMPS, "URL Components");
rankingParameters.put(plasmaSearchRankingProfile.URLLENGTH, "URL Length");
rankingParameters.put(plasmaSearchRankingProfile.WORDDISTANCE, "Word Distance");
rankingParameters.put(plasmaSearchRankingProfile.WORDSINTEXT, "Words In Text");
rankingParameters.put(plasmaSearchRankingProfile.WORDSINTITLE, "Words In Title");
rankingParameters.put(plasmaSearchRankingProfile.YBR, "YaCy Block Rank");
rankingParameters.put(plasmaSearchRankingProfile.LANGUAGE, "Preferred Language");
rankingParameters.put(RankingProfile.APP_DC_CREATOR, "Appearance In Author");
rankingParameters.put(RankingProfile.APP_DC_TITLE, "Appearance In Title");
rankingParameters.put(RankingProfile.APPEMPH, "Appearance In Emphasized Text");
rankingParameters.put(RankingProfile.APP_DC_DESCRIPTION, "Appearance In Reference/Anchor Name");
rankingParameters.put(RankingProfile.APP_DC_SUBJECT, "Appearance In Tags");
rankingParameters.put(RankingProfile.APPURL, "Appearance In URL");
rankingParameters.put(RankingProfile.AUTHORITY, "Authority of Domain");
rankingParameters.put(RankingProfile.CATHASAPP, "Category App, Appearance");
rankingParameters.put(RankingProfile.CATHASAUDIO, "Category Audio Appearance");
rankingParameters.put(RankingProfile.CATHASIMAGE, "Category Image Appearance");
rankingParameters.put(RankingProfile.CATHASVIDEO, "Category Video Appearance");
rankingParameters.put(RankingProfile.CATINDEXOF, "Category Index Page");
rankingParameters.put(RankingProfile.DATE, "Date");
rankingParameters.put(RankingProfile.DESCRCOMPINTOPLIST, "Description Comp. Appears In Toplist");
rankingParameters.put(RankingProfile.DOMLENGTH, "Domain Length");
rankingParameters.put(RankingProfile.HITCOUNT, "Hit Count");
rankingParameters.put(RankingProfile.LLOCAL, "Links To Local Domain");
rankingParameters.put(RankingProfile.LOTHER, "Links To Other Domain");
rankingParameters.put(RankingProfile.PHRASESINTEXT, "Phrases In Text");
rankingParameters.put(RankingProfile.POSINTEXT, "Position In Text");
rankingParameters.put(RankingProfile.POSOFPHRASE, "Position Of Phrase");
rankingParameters.put(RankingProfile.POSINPHRASE, "Position In Phrase");
rankingParameters.put(RankingProfile.PREFER, "Application Of Prefer Pattern");
rankingParameters.put(RankingProfile.TERMFREQUENCY, "Term Frequency");
rankingParameters.put(RankingProfile.URLCOMPINTOPLIST, "URL Component Appears In Toplist");
rankingParameters.put(RankingProfile.URLCOMPS, "URL Components");
rankingParameters.put(RankingProfile.URLLENGTH, "URL Length");
rankingParameters.put(RankingProfile.WORDDISTANCE, "Word Distance");
rankingParameters.put(RankingProfile.WORDSINTEXT, "Words In Text");
rankingParameters.put(RankingProfile.WORDSINTITLE, "Words In Title");
rankingParameters.put(RankingProfile.YBR, "YaCy Block Rank");
rankingParameters.put(RankingProfile.LANGUAGE, "Preferred Language");
}
private static serverObjects defaultValues() {
@ -98,7 +98,7 @@ public class Ranking_p {
return prop;
}
private static void putRanking(final serverObjects prop, final plasmaSearchRankingProfile rankingProfile, final String prefix) {
private static void putRanking(final serverObjects prop, final RankingProfile rankingProfile, final String prefix) {
putRanking(prop, rankingProfile.preToExternalMap(prefix), prefix, "Pre");
putRanking(prop, rankingProfile.postToExternalMap(prefix), prefix, "Post");
}
@ -138,15 +138,15 @@ public class Ranking_p {
if ((post == null) || (sb == null)) {
// we create empty entries for template strings
final serverObjects prop = defaultValues();
final plasmaSearchRankingProfile ranking;
if(sb == null) ranking = new plasmaSearchRankingProfile(Query.CONTENTDOM_TEXT);
final RankingProfile ranking;
if(sb == null) ranking = new RankingProfile(Query.CONTENTDOM_TEXT);
else ranking = sb.getRanking();
putRanking(prop, ranking, "local");
return prop;
}
if (post.containsKey("EnterRanking")) {
final plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile("local", post.toString());
final RankingProfile ranking = new RankingProfile("local", post.toString());
sb.setConfig("rankingProfile", crypt.simpleEncode(ranking.toExternalString()));
final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local"));
@ -156,14 +156,14 @@ public class Ranking_p {
if (post.containsKey("ResetRanking")) {
sb.setConfig("rankingProfile", "");
final plasmaSearchRankingProfile ranking = new plasmaSearchRankingProfile(Query.CONTENTDOM_TEXT);
final RankingProfile ranking = new RankingProfile(Query.CONTENTDOM_TEXT);
final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local"));
putRanking(prop, ranking, "local");
return prop;
}
final plasmaSearchRankingProfile localRanking = new plasmaSearchRankingProfile("local", post.toString());
final RankingProfile localRanking = new RankingProfile("local", post.toString());
final serverObjects prop = new serverObjects();
putRanking(prop, localRanking, "local");
prop.putAll(localRanking.toExternalMap("local"));

View File

@ -44,11 +44,11 @@ import de.anomic.kelondro.util.SortStack;
import de.anomic.net.natLib;
import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSearchEvent.ResultEntry;
import de.anomic.plasma.plasmaSearchRankingProcess.NavigatorEntry;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
@ -173,7 +173,7 @@ public final class search {
final long timestamp = System.currentTimeMillis();
// prepare a search profile
final plasmaSearchRankingProfile rankingProfile = (profile.length() == 0) ? new plasmaSearchRankingProfile(Query.contentdomParser(contentdom)) : new plasmaSearchRankingProfile("", profile);
final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(Query.contentdomParser(contentdom)) : new RankingProfile("", profile);
// prepare an abstract result
final StringBuilder indexabstract = new StringBuilder();

View File

@ -41,13 +41,13 @@ import de.anomic.kelondro.util.SetTools;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants;
import de.anomic.plasma.parser.Word;
import de.anomic.plasma.parser.Condenser;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.server.serverObjects;
@ -230,15 +230,15 @@ public class yacysearch {
plasmaSearchEvent.cleanupEvents(true);
}
final plasmaSearchRankingProfile ranking = sb.getRanking();
final RankingProfile ranking = sb.getRanking();
if (querystring.indexOf("NEAR") >= 0) {
querystring = querystring.replace("NEAR", "");
ranking.coeff_worddistance = plasmaSearchRankingProfile.COEFF_MAX;
ranking.coeff_worddistance = RankingProfile.COEFF_MAX;
}
if (querystring.indexOf("RECENT") >= 0) {
querystring = querystring.replace("RECENT", "");
ranking.coeff_date = plasmaSearchRankingProfile.COEFF_MAX;
ranking.coeff_date = RankingProfile.COEFF_MAX;
}
int lrp = querystring.indexOf("LANGUAGE:");
String lr = "";

View File

@ -37,19 +37,19 @@ import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
import de.anomic.kelondro.text.referencePrototype.WordReferenceVars;
import de.anomic.kelondro.util.ScoreCluster;
import de.anomic.plasma.plasmaSearchRankingProcess;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.parser.Condenser;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverProcessor;
import de.anomic.yacy.yacyURL;
public class ReferenceOrder {
private WordReferenceVars min, max;
private final plasmaSearchRankingProfile ranking;
private final RankingProfile ranking;
private final ScoreCluster<String> doms; // collected for "authority" heuristic
private int maxdomcount;
private String language;
public ReferenceOrder(final plasmaSearchRankingProfile profile, String language) {
public ReferenceOrder(final RankingProfile profile, String language) {
this.min = null;
this.max = null;
this.ranking = profile;

View File

@ -54,6 +54,7 @@ import de.anomic.plasma.parser.Condenser;
import de.anomic.plasma.plasmaSearchRankingProcess.NavigatorEntry;
import de.anomic.plasma.plasmaSnippetCache.MediaSnippet;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverProfiling;
import de.anomic.yacy.yacySearch;
import de.anomic.yacy.yacySeed;
@ -460,7 +461,7 @@ public final class plasmaSearchEvent {
public static plasmaSearchEvent getEvent(
final Query query,
final plasmaSearchRankingProfile ranking,
final RankingProfile ranking,
final Segment indexSegment,
final yacySeedDB peers,
final ResultURLs crawlResults,

View File

@ -164,6 +164,7 @@ import de.anomic.plasma.parser.ParserException;
import de.anomic.plasma.parser.Word;
import de.anomic.plasma.parser.Condenser;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverBusyThread;
import de.anomic.server.serverCore;
@ -1018,10 +1019,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
return null;
}
public plasmaSearchRankingProfile getRanking() {
public RankingProfile getRanking() {
return (getConfig("rankingProfile", "").length() == 0) ?
new plasmaSearchRankingProfile(Query.CONTENTDOM_TEXT) :
new plasmaSearchRankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
new RankingProfile(Query.CONTENTDOM_TEXT) :
new RankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
}
public boolean onlineCaution() {

View File

@ -32,7 +32,6 @@ import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.util.SetTools;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.parser.Word;
import de.anomic.plasma.parser.Condenser;
import de.anomic.yacy.yacySeed;
@ -72,7 +71,7 @@ public final class Query {
public Bitfield constraint;
public boolean allofconstraint;
public boolean onlineSnippetFetch;
public plasmaSearchRankingProfile ranking;
public RankingProfile ranking;
public String host; // this is the client host that starts the query, not a site operator
public String sitehash; // this is a domain hash, 6 bytes long or null
public String authorhash;
@ -85,7 +84,7 @@ public final class Query {
public Query(final String queryString,
final int lines,
final plasmaSearchRankingProfile ranking,
final RankingProfile ranking,
final Bitfield constraint) {
if ((queryString.length() == 12) && (Base64Order.enhancedCoder.wellformed(queryString.getBytes()))) {
this.queryString = null;
@ -126,7 +125,7 @@ public final class Query {
final String queryString, final TreeSet<byte[]> queryHashes,
final TreeSet<byte[]> excludeHashes,
final TreeSet<byte[]> fullqueryHashes,
final plasmaSearchRankingProfile ranking,
final RankingProfile ranking,
final int maxDistance, final String prefer, final int contentdom,
final String language,
final String navigators,

View File

@ -1,4 +1,4 @@
// plasmaSearchRankingProfile.java
// RankingProfile.java
// -------------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@yacy.net
@ -20,7 +20,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.plasma;
package de.anomic.search;
import java.util.HashMap;
import java.util.Iterator;
@ -28,7 +28,7 @@ import java.util.Map;
import de.anomic.search.Query;
public class plasmaSearchRankingProfile {
public class RankingProfile {
// pre-sort attributes
public static final String DOMLENGTH = "domlength";
@ -79,7 +79,7 @@ public class plasmaSearchRankingProfile {
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer,
coeff_termfrequency, coeff_language;
public plasmaSearchRankingProfile(final int mediatype) {
public RankingProfile(final int mediatype) {
// set default-values
coeff_domlength = 11;
coeff_ybr = 9;
@ -115,7 +115,7 @@ public class plasmaSearchRankingProfile {
coeff_language = 13;
}
public plasmaSearchRankingProfile(final String prefix, final String profile) {
public RankingProfile(final String prefix, final String profile) {
this(Query.CONTENTDOM_TEXT); // set defaults
if ((profile != null) && (profile.length() > 0)) {
//parse external form

View File

@ -79,11 +79,11 @@ import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.util.ByteBuffer;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaSearchRankingProcess;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSwitchboardConstants;
import de.anomic.plasma.parser.Word;
import de.anomic.search.RankingProfile;
import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.tools.crypt;
@ -438,7 +438,7 @@ public final class yacyClient {
final plasmaSearchRankingProcess containerCache,
final Map<String, TreeMap<String, String>> abstractCache,
final Blacklist blacklist,
final plasmaSearchRankingProfile rankingProfile,
final RankingProfile rankingProfile,
final Bitfield constraint
) {
// send a search request to peer with remote Hash

View File

@ -37,8 +37,8 @@ import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.text.Segment;
import de.anomic.kelondro.util.ScoreCluster;
import de.anomic.plasma.plasmaSearchRankingProcess;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.search.Query;
import de.anomic.search.RankingProfile;
import de.anomic.yacy.dht.PeerSelection;
import de.anomic.yacy.logging.Log;
@ -54,7 +54,7 @@ public class yacySearch extends Thread {
final private yacySeed targetPeer;
private String[] urls;
private final int count, maxDistance;
final private plasmaSearchRankingProfile rankingProfile;
final private RankingProfile rankingProfile;
final private String prefer, filter, language;
final private Bitfield constraint;
final private yacySeedDB peers;
@ -76,7 +76,7 @@ public class yacySearch extends Thread {
final plasmaSearchRankingProcess containerCache,
final Map<String, TreeMap<String, String>> abstractCache,
final Blacklist blacklist,
final plasmaSearchRankingProfile rankingProfile,
final RankingProfile rankingProfile,
final Bitfield constraint) {
super("yacySearch_" + targetPeer.getName());
//System.out.println("DEBUG - yacySearch thread " + this.getName() + " initialized " + ((urlhashes.length() == 0) ? "(primary)" : "(secondary)"));
@ -257,7 +257,7 @@ public class yacySearch extends Thread {
final Map<String, TreeMap<String, String>> abstractCache,
int targets,
final Blacklist blacklist,
final plasmaSearchRankingProfile rankingProfile,
final RankingProfile rankingProfile,
final Bitfield constraint,
final TreeMap<byte[], String> clusterselection) {
// check own peer status
@ -298,7 +298,7 @@ public class yacySearch extends Thread {
final ResultURLs crawlResults,
final plasmaSearchRankingProcess containerCache,
final String targethash, final Blacklist blacklist,
final plasmaSearchRankingProfile rankingProfile,
final RankingProfile rankingProfile,
final Bitfield constraint, final TreeMap<byte[], String> clusterselection) {
assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes;