diff --git a/htroot/Ranking_p.java b/htroot/Ranking_p.java index c883b9115..98170e5b7 100644 --- a/htroot/Ranking_p.java +++ b/htroot/Ranking_p.java @@ -29,11 +29,11 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; +import net.yacy.cora.document.Classification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.tools.crypt; @@ -151,7 +151,7 @@ public class Ranking_p { // we create empty entries for template strings final serverObjects prop = defaultValues(); final RankingProfile ranking; - if (sb == null) ranking = new RankingProfile(ContentDomain.TEXT); + if (sb == null) ranking = new RankingProfile(Classification.ContentDomain.TEXT); else ranking = sb.getRanking(); putRanking(prop, ranking, "local"); return prop; @@ -168,7 +168,7 @@ public class Ranking_p { if (post.containsKey("ResetRanking")) { sb.setConfig("rankingProfile", ""); - final RankingProfile ranking = new RankingProfile(ContentDomain.TEXT); + final RankingProfile ranking = new RankingProfile(Classification.ContentDomain.TEXT); final serverObjects prop = defaultValues(); //prop.putAll(ranking.toExternalMap("local")); putRanking(prop, ranking, "local"); diff --git a/htroot/index.java b/htroot/index.java index 49cfcceab..3d3a54773 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -29,10 +29,11 @@ // if the shell's current path is HTROOT +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; -import net.yacy.search.snippet.ContentDomain; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -79,7 +80,7 @@ public class index { global = global && indexReceiveGranted; // search domain - ContentDomain contentdom = ContentDomain.TEXT; + Classification.ContentDomain contentdom = ContentDomain.TEXT; final String cds = (post == null) ? "text" : post.get("contentdom", "text"); if (cds.equals("text")) contentdom = ContentDomain.TEXT; if (cds.equals("audio")) contentdom = ContentDomain.AUDIO; diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 354b0b403..bb6265536 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -38,6 +38,8 @@ import java.util.TreeSet; import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; @@ -70,7 +72,6 @@ import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.ResultEntry; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; @@ -211,7 +212,7 @@ public final class search { final long timestamp = System.currentTimeMillis(); // prepare a search profile - final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile); + final RankingProfile rankingProfile = (profile.length() == 0) ? new RankingProfile(Classification.ContentDomain.contentdomParser(contentdom)) : new RankingProfile("", profile); // prepare an abstract result final StringBuilder indexabstract = new StringBuilder(6000); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 7635a3f45..c4ff6c123 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -39,6 +39,8 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.Domains; @@ -76,7 +78,6 @@ import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import de.anomic.data.DidYouMean; import de.anomic.data.UserDB; import de.anomic.data.ymark.YMarkTables; @@ -121,7 +122,7 @@ public class yacysearch { //get focus option final boolean focus = (post == null) ? true : post.get("focus", "1").equals("1"); prop.put("focus", focus ? 1 : 0); - + // produce vocabulary navigation sidebars Collection vocabularies = LibraryProvider.autotagging.getVocabularies(); int j = 0; @@ -270,7 +271,7 @@ public class yacysearch { } // find search domain - final ContentDomain contentdom = + final Classification.ContentDomain contentdom = ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text")); // patch until better search profiles are available diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index d6a5dce32..93577644a 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -30,6 +30,8 @@ import java.util.List; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader.FileType; @@ -45,7 +47,6 @@ import net.yacy.search.SwitchboardConstants; import net.yacy.search.query.QueryParams; import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; -import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; @@ -104,7 +105,7 @@ public class yacysearchitem { prop.put("navurlBase", QueryParams.navurlBase("html", theQuery, null, theQuery.urlMask.toString(), theQuery.navigators).toString()); final String target_special_pattern = sb.getConfig(SwitchboardConstants.SEARCH_TARGET_SPECIAL_PATTERN, ""); - if (theQuery.contentdom == ContentDomain.TEXT) { + if (theQuery.contentdom == Classification.ContentDomain.TEXT) { // text search // generate result object @@ -210,7 +211,7 @@ public class yacysearchitem { return prop; } - if (theQuery.contentdom == ContentDomain.IMAGE) { + if (theQuery.contentdom == Classification.ContentDomain.IMAGE) { // image search; shows thumbnails prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content diff --git a/source/net/yacy/cora/document/Classification.java b/source/net/yacy/cora/document/Classification.java index f619430a9..c70f4b1e0 100644 --- a/source/net/yacy/cora/document/Classification.java +++ b/source/net/yacy/cora/document/Classification.java @@ -35,6 +35,46 @@ public class Classification { private static final Set videoExtSet = new HashSet(); private static final Set appsExtSet = new HashSet(); + public enum ContentDomain { + + ALL(-1), + TEXT(0), + IMAGE(1), + AUDIO(2), + VIDEO(3), + APP(4); + + private final int code; + + ContentDomain(int code) { + this.code = code; + } + + public int getCode() { + return this.code; + } + + public static ContentDomain contentdomParser(final String dom) { + if ("all".equals(dom)) return ALL; + else if ("text".equals(dom)) return TEXT; + else if ("image".equals(dom)) return IMAGE; + else if ("audio".equals(dom)) return AUDIO; + else if ("video".equals(dom)) return VIDEO; + else if ("app".equals(dom)) return APP; + return TEXT; + } + + @Override + public String toString() { + if (this == ALL) return "all"; + else if (this == TEXT) return "text"; + else if (this == IMAGE) return "image"; + else if (this == AUDIO) return "audio"; + else if (this == VIDEO) return "video"; + else if (this == APP) return "app"; + return "text"; + } + } static { diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index c795c83d6..6de5f4a00 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -62,6 +62,7 @@ import java.util.regex.Pattern; import net.yacy.migration; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; import net.yacy.cora.document.JSONArray; import net.yacy.cora.document.JSONException; import net.yacy.cora.document.JSONObject; @@ -103,7 +104,6 @@ import net.yacy.search.query.QueryParams; import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.TextSnippet; import org.apache.http.entity.mime.content.ContentBody; @@ -1524,7 +1524,7 @@ public final class Protocol "", args[1], null, //secondarySearchSuperviser, - new RankingProfile(ContentDomain.TEXT), // rankingProfile, + new RankingProfile(Classification.ContentDomain.TEXT), // rankingProfile, null // constraint); ); for ( final URIMetadataRow link : result.links ) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 53abb613a..291c83cbc 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -76,6 +76,7 @@ import java.util.zip.ZipInputStream; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; @@ -149,7 +150,6 @@ import net.yacy.search.query.SearchEvent; import net.yacy.search.query.SearchEventCache; import net.yacy.search.ranking.BlockRank; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import de.anomic.crawler.Cache; import de.anomic.crawler.CrawlProfile; import de.anomic.crawler.CrawlQueues; @@ -1462,7 +1462,7 @@ public final class Switchboard extends serverSwitch public RankingProfile getRanking() { return (getConfig("rankingProfile", "").length() == 0) - ? new RankingProfile(ContentDomain.TEXT) + ? new RankingProfile(Classification.ContentDomain.TEXT) : new RankingProfile("", crypt.simpleDecode(sb.getConfig("rankingProfile", ""), null)); } diff --git a/source/net/yacy/search/index/DocumentIndex.java b/source/net/yacy/search/index/DocumentIndex.java index c6081b3e4..e74df82a8 100644 --- a/source/net/yacy/search/index/DocumentIndex.java +++ b/source/net/yacy/search/index/DocumentIndex.java @@ -34,6 +34,7 @@ import java.util.Date; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; +import net.yacy.cora.document.Classification; import net.yacy.cora.document.UTF8; import net.yacy.document.Condenser; import net.yacy.document.Document; @@ -47,7 +48,6 @@ import net.yacy.search.query.RWIProcess; import net.yacy.search.query.SearchEvent; import net.yacy.search.ranking.RankingProfile; import net.yacy.search.ranking.ReferenceOrder; -import net.yacy.search.snippet.ContentDomain; /** * convenience class to access the yacycore library from outside of yacy to put files into the index @@ -57,7 +57,7 @@ import net.yacy.search.snippet.ContentDomain; public class DocumentIndex extends Segment { - private static final RankingProfile textRankingDefault = new RankingProfile(ContentDomain.TEXT); + private static final RankingProfile textRankingDefault = new RankingProfile(Classification.ContentDomain.TEXT); //private Bitfield zeroConstraint = new Bitfield(4); private static DigestURI poison; diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index 052f70062..ec2dd8ccb 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -40,6 +40,8 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.services.federated.yacy.CacheStrategy; @@ -59,7 +61,6 @@ import net.yacy.kelondro.util.SetTools; import net.yacy.peers.Seed; import net.yacy.search.index.Segment; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; public final class QueryParams { @@ -112,7 +113,7 @@ public final class QueryParams { public int offset; public final Pattern urlMask, prefer; public final boolean urlMask_isCatchall, prefer_isMatchnothing; - public final ContentDomain contentdom; + public final Classification.ContentDomain contentdom; public final String targetlang; public final Collection metatags; public final String navigators; @@ -290,10 +291,6 @@ public final class QueryParams { this.offset = newOffset; } - public String contentdom() { - return this.contentdom.toString(); - } - public boolean isLocal() { return this.domType == Searchdom.LOCAL; } @@ -588,7 +585,7 @@ public final class QueryParams { sb.append(ampersand); sb.append("contentdom="); - sb.append(theQuery.contentdom()); + sb.append(theQuery.contentdom.toString()); sb.append(ampersand); sb.append("former="); diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index 104cacca1..dce9fcf8e 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -39,6 +39,8 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.protocol.Scanner; import net.yacy.cora.sorting.ClusteredScoreMap; @@ -65,7 +67,6 @@ import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; import net.yacy.search.ranking.ReferenceOrder; -import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.ResultEntry; public final class RWIProcess extends Thread @@ -288,7 +289,7 @@ public final class RWIProcess extends Thread } // check document domain - if ( this.query.contentdom != ContentDomain.TEXT ) { + if ( this.query.contentdom != Classification.ContentDomain.TEXT ) { if ( (this.query.contentdom == ContentDomain.AUDIO) && (!(iEntry.flags().get(Condenser.flag_cat_hasaudio))) ) { continue pollloop; diff --git a/source/net/yacy/search/query/SearchEventCache.java b/source/net/yacy/search/query/SearchEventCache.java index af632f0e0..a5491067f 100644 --- a/source/net/yacy/search/query/SearchEventCache.java +++ b/source/net/yacy/search/query/SearchEventCache.java @@ -32,6 +32,7 @@ import java.util.SortedMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import net.yacy.cora.document.Classification; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.MemoryControl; import net.yacy.peers.SeedDB; @@ -40,7 +41,6 @@ import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; import net.yacy.search.ranking.RankingProfile; -import net.yacy.search.snippet.ContentDomain; import de.anomic.data.WorkTables; public class SearchEventCache { @@ -116,7 +116,7 @@ public class SearchEventCache { private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) { Log.logWarning("SearchEventCache", "returning dummy event"); if (dummyEvent != null) return dummyEvent; - final QueryParams query = new QueryParams("", 0, null, indexSegment, new RankingProfile(ContentDomain.TEXT), ""); + final QueryParams query = new QueryParams("", 0, null, indexSegment, new RankingProfile(Classification.ContentDomain.TEXT), ""); dummyEvent = new SearchEvent(query, null, workTables, null, false, loader, 0, 0, 0, 0, false); return dummyEvent; } diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index 39898a502..745b113b2 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.regex.Pattern; import net.yacy.cora.document.ASCII; +import net.yacy.cora.document.Classification; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.services.federated.solr.SolrConnector; @@ -52,7 +53,6 @@ import net.yacy.peers.SeedDB; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; -import net.yacy.search.snippet.ContentDomain; import net.yacy.search.snippet.MediaSnippet; import net.yacy.search.snippet.ResultEntry; import net.yacy.search.snippet.TextSnippet; @@ -293,23 +293,15 @@ public class SnippetProcess { long r = 0; // for media search: prefer pages with many links - if (this.query.contentdom == ContentDomain.IMAGE) { - r += rentry.limage() << this.query.ranking.coeff_cathasimage; - } - if (this.query.contentdom == ContentDomain.AUDIO) { - r += rentry.laudio() << this.query.ranking.coeff_cathasaudio; - } - if (this.query.contentdom == ContentDomain.VIDEO) { - r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo; - } - if (this.query.contentdom == ContentDomain.APP ) { - r += rentry.lapp() << this.query.ranking.coeff_cathasapp; - } + r += rentry.limage() << this.query.ranking.coeff_cathasimage; + r += rentry.laudio() << this.query.ranking.coeff_cathasaudio; + r += rentry.lvideo() << this.query.ranking.coeff_cathasvideo; + r += rentry.lapp() << this.query.ranking.coeff_cathasapp; // apply citation count //System.out.println("POSTRANKING CITATION: references = " + rentry.referencesCount() + ", inbound = " + rentry.llocal() + ", outbound = " + rentry.lother()); r += (128 * rentry.referencesCount() / (1 + 2 * rentry.llocal() + rentry.lother())) << this.query.ranking.coeff_citation; - + // prefer hit with 'prefer' pattern if (this.query.prefer.matcher(rentry.url().toNormalform(true, true)).matches()) { r += 256 << this.query.ranking.coeff_prefer; @@ -588,7 +580,7 @@ public class SnippetProcess { } // load snippet - if (this.query.contentdom == ContentDomain.TEXT) { + if (this.query.contentdom == Classification.ContentDomain.TEXT) { // attach text snippet startTime = System.currentTimeMillis(); final TextSnippet snippet = new TextSnippet( diff --git a/source/net/yacy/search/ranking/RankingProfile.java b/source/net/yacy/search/ranking/RankingProfile.java index 53dfa2d21..42d5f51e8 100644 --- a/source/net/yacy/search/ranking/RankingProfile.java +++ b/source/net/yacy/search/ranking/RankingProfile.java @@ -30,8 +30,9 @@ import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; +import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.kelondro.logging.Log; -import net.yacy.search.snippet.ContentDomain; public class RankingProfile { @@ -85,7 +86,7 @@ public class RankingProfile { coeff_urlcompintoplist, coeff_descrcompintoplist, coeff_prefer, coeff_termfrequency, coeff_language, coeff_citation; - public RankingProfile(final ContentDomain mediatype) { + public RankingProfile(final Classification.ContentDomain mediatype) { // set default-values this.coeff_appemph = 5; this.coeff_appurl = 11; diff --git a/source/net/yacy/search/snippet/ContentDomain.java b/source/net/yacy/search/snippet/ContentDomain.java deleted file mode 100644 index d2039c3fe..000000000 --- a/source/net/yacy/search/snippet/ContentDomain.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * ContentDomain - * Copyright 2011 by Michael Christen - * First released 18.05.2011 at http://yacy.net - * - * $LastChangedDate$ - * $LastChangedRevision$ - * $LastChangedBy$ - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program in the file lgpl21.txt - * If not, see . - */ - -package net.yacy.search.snippet; - -public enum ContentDomain { - - ALL(-1), - TEXT(0), - IMAGE(1), - AUDIO(2), - VIDEO(3), - APP(4); - - private int code; - - ContentDomain(int code) { - this.code = code; - } - - public int getCode() { - return this.code; - } - - public static ContentDomain contentdomParser(final String dom) { - if ("all".equals(dom)) return ALL; - else if ("text".equals(dom)) return TEXT; - else if ("image".equals(dom)) return IMAGE; - else if ("audio".equals(dom)) return AUDIO; - else if ("video".equals(dom)) return VIDEO; - else if ("app".equals(dom)) return APP; - return TEXT; - } - - @Override - public String toString() { - if (this == ALL) return "all"; - else if (this == TEXT) return "text"; - else if (this == IMAGE) return "image"; - else if (this == AUDIO) return "audio"; - else if (this == VIDEO) return "video"; - else if (this == APP) return "app"; - return "text"; - } -} diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index ccc8e5d2f..379f02733 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -37,6 +37,7 @@ import java.util.TreeSet; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; +import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.services.federated.yacy.CacheStrategy; import net.yacy.document.Document; @@ -127,7 +128,7 @@ public class MediaSnippet implements Comparable, Comparator retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final ContentDomain mediatype, final CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) { + public static List retrieveMediaSnippets(final DigestURI url, final HandleSet queryhashes, final Classification.ContentDomain mediatype, final CacheStrategy cacheStrategy, final int timeout, final boolean reindexing) { if (queryhashes.isEmpty()) { Log.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList();