// MediaSnippet.java // ----------------- // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2005 // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package de.anomic.search; import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.TreeSet; import net.yacy.kelondro.logging.Log; import de.anomic.crawler.retrieval.LoaderDispatcher; import de.anomic.document.Document; import de.anomic.document.parser.html.ImageEntry; import de.anomic.yacy.yacyURL; public class MediaSnippet { public int type; public yacyURL href, source; public String name, attr; public int ranking; public MediaSnippet(final int type, final yacyURL href, final String name, final String attr, final int ranking, final yacyURL source) { this.type = type; this.href = href; this.source = source; // the web page where the media resource appeared this.name = name; this.attr = attr; this.ranking = ranking; // the smaller the better! small values should be shown first if ((this.name == null) || (this.name.length() == 0)) this.name = "_"; if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_"; } @Override public int hashCode() { return href.hashCode(); } public static ArrayList retrieveMediaSnippets(final yacyURL url, final TreeSet queryhashes, final int mediatype, final boolean fetchOnline, final int timeout, final boolean reindexing) { if (queryhashes.size() == 0) { Log.logFine("snippet fetch", "no query hashes given for url " + url); return new ArrayList(); } final Document document = LoaderDispatcher.retrieveDocument(url, fetchOnline, timeout, false, reindexing); final ArrayList a = new ArrayList(); if (document != null) { if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_AUDIO)); if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_VIDEO)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_VIDEO)); if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_APP)) a.addAll(computeMediaSnippets(document, queryhashes, QueryParams.CONTENTDOM_APP)); if ((mediatype == QueryParams.CONTENTDOM_ALL) || (mediatype == QueryParams.CONTENTDOM_IMAGE)) a.addAll(computeImageSnippets(document, queryhashes)); } return a; } public static ArrayList computeMediaSnippets(final Document document, final TreeSet queryhashes, final int mediatype) { if (document == null) return new ArrayList(); Map media = null; if (mediatype == QueryParams.CONTENTDOM_AUDIO) media = document.getAudiolinks(); else if (mediatype == QueryParams.CONTENTDOM_VIDEO) media = document.getVideolinks(); else if (mediatype == QueryParams.CONTENTDOM_APP) media = document.getApplinks(); if (media == null) return null; final Iterator> i = media.entrySet().iterator(); Map.Entry entry; yacyURL url; String desc; TreeSet s; final ArrayList result = new ArrayList(); while (i.hasNext()) { entry = i.next(); url = entry.getKey(); desc = entry.getValue(); s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); if (s.size() == 0) { result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source())); continue; } s = TextSnippet.removeAppearanceHashes(desc, s); if (s.size() == 0) { result.add(new MediaSnippet(mediatype, url, desc, null, 0, document.dc_source())); continue; } } return result; } public static ArrayList computeImageSnippets(final Document document, final TreeSet queryhashes) { final TreeSet images = new TreeSet(); images.addAll(document.getImages().values()); // iterates images in descending size order! // a measurement for the size of the images can be retrieved using the htmlFilterImageEntry.hashCode() final Iterator i = images.iterator(); ImageEntry ientry; yacyURL url; String desc; TreeSet s; final ArrayList result = new ArrayList(); while (i.hasNext()) { ientry = i.next(); url = ientry.url(); desc = ientry.alt(); s = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes); if (s.size() == 0) { final int ranking = ientry.hashCode(); result.add(new MediaSnippet(QueryParams.CONTENTDOM_IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source())); continue; } s = TextSnippet.removeAppearanceHashes(desc, s); if (s.size() == 0) { final int ranking = ientry.hashCode(); result.add(new MediaSnippet(QueryParams.CONTENTDOM_IMAGE, url, desc, ientry.width() + " x " + ientry.height(), ranking, document.dc_source())); continue; } } return result; } /* private static String computeMediaSnippet(Map media, Set queryhashes) { Iterator> i = media.entrySet().iterator(); Map.Entry entry; yacyURL url; String desc; Set s; String result = ""; while (i.hasNext()) { entry = i.next(); url = entry.getKey(); desc = entry.getValue(); s = removeAppearanceHashes(url.toNormalform(false, false), queryhashes); if (s.size() == 0) { result += "
" + ((desc.length() == 0) ? url : desc) + ""; continue; } s = removeAppearanceHashes(desc, s); if (s.size() == 0) { result += "
" + ((desc.length() == 0) ? url : desc) + ""; continue; } } if (result.length() == 0) return null; return result.substring(6); } */ }