refactoring of ContentDomain: now subclass of Classification

This commit is contained in:
Michael Peter Christen 2012-04-22 00:04:36 +02:00
parent 8a08c96a82
commit 14f67f217c
16 changed files with 84 additions and 114 deletions

View File

@ -29,11 +29,11 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import net.yacy.cora.document.Classification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -151,7 +151,7 @@ public class Ranking_p {
// we create empty entries for template strings
final serverObjects prop = defaultValues();
final RankingProfile ranking;
if (sb == null) ranking = new RankingProfile(ContentDomain.TEXT);
if (sb == null) ranking = new RankingProfile(Classification.ContentDomain.TEXT);
else ranking = sb.getRanking();
putRanking(prop, ranking, "local");
return prop;
@ -168,7 +168,7 @@ public class Ranking_p {
if (post.containsKey("ResetRanking")) {
sb.setConfig("rankingProfile", "");
final RankingProfile ranking = new RankingProfile(ContentDomain.TEXT);
final RankingProfile ranking = new RankingProfile(Classification.ContentDomain.TEXT);
final serverObjects prop = defaultValues();
//prop.putAll(ranking.toExternalMap("local"));
putRanking(prop, ranking, "local");

View File

@ -29,10 +29,11 @@
// if the shell's current path is HTROOT
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.snippet.ContentDomain;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -79,7 +80,7 @@ public class index {
global = global && indexReceiveGranted;
// search domain
ContentDomain contentdom = ContentDomain.TEXT;
Classification.ContentDomain contentdom = ContentDomain.TEXT;
final String cds = (post == null) ? "text" : post.get("contentdom", "text");
if (cds.equals("text")) contentdom = ContentDomain.TEXT;
if (cds.equals("audio")) contentdom = ContentDomain.AUDIO;

View File

@ -38,6 +38,8 @@ import java.util.TreeSet;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
@ -70,7 +72,6 @@ import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.ResultEntry;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
@ -211,7 +212,7 @@ public final class search {
final long timestamp = System.currentTimeMillis();
// prepare a search profile
final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);
final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(Classification.ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile);
// prepare an abstract result
final StringBuilder indexabstract = new StringBuilder(6000);

View File

@ -39,6 +39,8 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.Domains;
@ -76,7 +78,6 @@ import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import de.anomic.data.DidYouMean;
import de.anomic.data.UserDB;
import de.anomic.data.ymark.YMarkTables;
@ -270,7 +271,7 @@ public class yacysearch {
}
// find search domain
final ContentDomain contentdom =
final Classification.ContentDomain contentdom =
ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
// patch until better search profiles are available

View File

@ -30,6 +30,8 @@ import java.util.List;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.RequestHeader.FileType;
@ -45,7 +47,6 @@ import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.QueryParams;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet;
@ -104,7 +105,7 @@ public class yacysearchitem {
prop.put("navurlBase", QueryParams.navurlBase("html", theQuery, null, theQuery.urlMask.toString(), theQuery.navigators).toString());
final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, "");
if (theQuery.contentdom == ContentDomain.TEXT) {
if (theQuery.contentdom == Classification.ContentDomain.TEXT) {
// text search
// generate result object
@ -210,7 +211,7 @@ public class yacysearchitem {
return prop;
}
if (theQuery.contentdom == ContentDomain.IMAGE) {
if (theQuery.contentdom == Classification.ContentDomain.IMAGE) {
// image search; shows thumbnails
prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content

View File

@ -35,6 +35,46 @@ public class Classification {
private static final Set<String> videoExtSet = new HashSet<String>();
private static final Set<String> appsExtSet = new HashSet<String>();
public enum ContentDomain {
ALL(-1),
TEXT(0),
IMAGE(1),
AUDIO(2),
VIDEO(3),
APP(4);
private final int code;
ContentDomain(int code) {
this.code = code;
}
public int getCode() {
return this.code;
}
public static ContentDomain contentdomParser(final String dom) {
if ("all".equals(dom)) return ALL;
else if ("text".equals(dom)) return TEXT;
else if ("image".equals(dom)) return IMAGE;
else if ("audio".equals(dom)) return AUDIO;
else if ("video".equals(dom)) return VIDEO;
else if ("app".equals(dom)) return APP;
return TEXT;
}
@Override
public String toString() {
if (this == ALL) return "all";
else if (this == TEXT) return "text";
else if (this == IMAGE) return "image";
else if (this == AUDIO) return "audio";
else if (this == VIDEO) return "video";
else if (this == APP) return "app";
return "text";
}
}
static {

View File

@ -62,6 +62,7 @@ import java.util.regex.Pattern;
import net.yacy.migration;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.JSONArray;
import net.yacy.cora.document.JSONException;
import net.yacy.cora.document.JSONObject;
@ -103,7 +104,6 @@ import net.yacy.search.query.QueryParams;
import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.TextSnippet;
import org.apache.http.entity.mime.content.ContentBody;
@ -1524,7 +1524,7 @@ public final class Protocol
"",
args[1],
null, //secondarySearchSuperviser,
new RankingProfile(ContentDomain.TEXT), // rankingProfile,
new RankingProfile(Classification.ContentDomain.TEXT), // rankingProfile,
null // constraint);
);
for ( final URIMetadataRow link : result.links ) {

View File

@ -76,6 +76,7 @@ import java.util.zip.ZipInputStream;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
@ -149,7 +150,6 @@ import net.yacy.search.query.SearchEvent;
import net.yacy.search.query.SearchEventCache;
import net.yacy.search.ranking.BlockRank;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import de.anomic.crawler.Cache;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlQueues;
@ -1462,7 +1462,7 @@ public final class Switchboard extends serverSwitch
public RankingProfile getRanking() {
return (getConfig("rankingProfile", "").length() == 0)
? new RankingProfile(ContentDomain.TEXT)
? new RankingProfile(Classification.ContentDomain.TEXT)
: new RankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null));
}

View File

@ -34,6 +34,7 @@ import java.util.Date;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.UTF8;
import net.yacy.document.Condenser;
import net.yacy.document.Document;
@ -47,7 +48,6 @@ import net.yacy.search.query.RWIProcess;
import net.yacy.search.query.SearchEvent;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ContentDomain;
/**
* convenience class to access the yacycore library from outside of yacy to put files into the index
@ -57,7 +57,7 @@ import net.yacy.search.snippet.ContentDomain;
public class DocumentIndex extends Segment
{
private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT);
private static final RankingProfile textRankingDefault = new RankingProfile(Classification.ContentDomain.TEXT);
//private Bitfield zeroConstraint = new Bitfield(4);
private static DigestURI poison;

View File

@ -40,6 +40,8 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
@ -59,7 +61,6 @@ import net.yacy.kelondro.util.SetTools;
import net.yacy.peers.Seed;
import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
public final class QueryParams {
@ -112,7 +113,7 @@ public final class QueryParams {
public int offset;
public final Pattern urlMask, prefer;
public final boolean urlMask_isCatchall, prefer_isMatchnothing;
public final ContentDomain contentdom;
public final Classification.ContentDomain contentdom;
public final String targetlang;
public final Collection<Autotagging.Metatag> metatags;
public final String navigators;
@ -290,10 +291,6 @@ public final class QueryParams {
this.offset = newOffset;
}
public String contentdom() {
return this.contentdom.toString();
}
public boolean isLocal() {
return this.domType == Searchdom.LOCAL;
}
@ -588,7 +585,7 @@ public final class QueryParams {
sb.append(ampersand);
sb.append("contentdom=");
sb.append(theQuery.contentdom());
sb.append(theQuery.contentdom.toString());
sb.append(ampersand);
sb.append("former=");

View File

@ -39,6 +39,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Scanner;
import net.yacy.cora.sorting.ClusteredScoreMap;
@ -65,7 +67,6 @@ import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
import net.yacy.search.ranking.ReferenceOrder;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.ResultEntry;
public final class RWIProcess extends Thread
@ -288,7 +289,7 @@ public final class RWIProcess extends Thread
}
// check document domain
if ( this.query.contentdom != ContentDomain.TEXT ) {
if ( this.query.contentdom != Classification.ContentDomain.TEXT ) {
if ( (this.query.contentdom == ContentDomain.AUDIO)
&& (!(iEntry.flags().get(Condenser.flag_cat_hasaudio))) ) {
continue pollloop;

View File

@ -32,6 +32,7 @@ import java.util.SortedMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import net.yacy.cora.document.Classification;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.SeedDB;
@ -40,7 +41,6 @@ import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
import net.yacy.search.ranking.RankingProfile;
import net.yacy.search.snippet.ContentDomain;
import de.anomic.data.WorkTables;
public class SearchEventCache {
@ -116,7 +116,7 @@ public class SearchEventCache {
private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) {
Log.logWarning("SearchEventCache", "returning dummy event");
if (dummyEvent != null) return dummyEvent;
final QueryParams query = new QueryParams("", 0, null, indexSegment, new RankingProfile(ContentDomain.TEXT), "");
final QueryParams query = new QueryParams("", 0, null, indexSegment, new RankingProfile(Classification.ContentDomain.TEXT), "");
dummyEvent = new SearchEvent(query, null, workTables, null, false, loader, 0, 0, 0, 0, false);
return dummyEvent;
}

View File

@ -32,6 +32,7 @@ import java.util.List;
import java.util.regex.Pattern;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.services.federated.solr.SolrConnector;
@ -52,7 +53,6 @@ import net.yacy.peers.SeedDB;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.snippet.ContentDomain;
import net.yacy.search.snippet.MediaSnippet;
import net.yacy.search.snippet.ResultEntry;
import net.yacy.search.snippet.TextSnippet;
@ -293,18 +293,10 @@ public class SnippetProcess {
long r = 0;
// for media search: prefer pages with many links
if (this.query.contentdom == ContentDomain.IMAGE) {
r += rentry.limage() << this.query.ranking.coeff_cathasimage;
}
if (this.query.contentdom == ContentDomain.AUDIO) {
r += rentry.laudio() << this.query.ranking.coeff_cathasaudio;
}
if (this.query.contentdom == ContentDomain.VIDEO) {
r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo;
}
if (this.query.contentdom == ContentDomain.APP ) {
r += rentry.lapp() << this.query.ranking.coeff_cathasapp;
}
// apply citation count
//System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother());
@ -588,7 +580,7 @@ public class SnippetProcess {
}
// load snippet
if (this.query.contentdom == ContentDomain.TEXT) {
if (this.query.contentdom == Classification.ContentDomain.TEXT) {
// attach text snippet
startTime = System.currentTimeMillis();
final TextSnippet snippet = new TextSnippet(

View File

@ -30,8 +30,9 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.snippet.ContentDomain;
public class RankingProfile {
@ -85,7 +86,7 @@ public class RankingProfile {
coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer,
coeff_termfrequency, coeff_language, coeff_citation;
public RankingProfile(final ContentDomain mediatype) {
public RankingProfile(final Classification.ContentDomain mediatype) {
// set default-values
this.coeff_appemph = 5;
this.coeff_appurl = 11;

View File

@ -1,66 +0,0 @@
/**
* ContentDomain
* Copyright 2011 by Michael Christen
* First released 18.05.2011 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.search.snippet;
public enum ContentDomain {
ALL(-1),
TEXT(0),
IMAGE(1),
AUDIO(2),
VIDEO(3),
APP(4);
private int code;
ContentDomain(int code) {
this.code = code;
}
public int getCode() {
return this.code;
}
public static ContentDomain contentdomParser(final String dom) {
if ("all".equals(dom)) return ALL;
else if ("text".equals(dom)) return TEXT;
else if ("image".equals(dom)) return IMAGE;
else if ("audio".equals(dom)) return AUDIO;
else if ("video".equals(dom)) return VIDEO;
else if ("app".equals(dom)) return APP;
return TEXT;
}
@Override
public String toString() {
if (this == ALL) return "all";
else if (this == TEXT) return "text";
else if (this == IMAGE) return "image";
else if (this == AUDIO) return "audio";
else if (this == VIDEO) return "video";
else if (this == APP) return "app";
return "text";
}
}

View File

@ -37,6 +37,7 @@ import java.util.TreeSet;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.services.federated.yacy.CacheStrategy;
import net.yacy.document.Document;
@ -127,7 +128,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return o1.compareTo(o2);
}
public static List<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) {
public static List<MediaSnippet> retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final Classification.ContentDomain mediatype, final CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) {
if (queryhashes.isEmpty()) {
Log.logFine("snippet fetch", "no query hashes given for url " + url);
return new ArrayList<MediaSnippet>();