- added a new RSS reader interface. This is not finished but you can now load and look at RSS feeds. It will be used to index RSS feeds in a way that is appropriate for such kind of data.

- refactoring of Mediawiki and PHPBB3 loader interface names (just renamed) - removed two old not used RSS loader interfaces - fixed a bug in RSS parser library of cora - added a new RSS parser component to the set of yacy document parsers git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7053 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2010-08-20 11:30:02 +00:00 · 2010-08-20 11:30:02 +00:00 · e10cd115a9
commit e10cd115a9
parent 933dc1a600
14 changed files with 324 additions and 233 deletions
--- a/htroot/FeedReader_p.html
+++ b/htroot/FeedReader_p.html
@ -1,39 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-  <head>
-    <title>YaCy '#[clientname]#': Feed Reader</title>
-    #%env/templates/metas.template%#
-  </head>
-<body>
-#%env/templates/header.template%#
-#(page)#
-please select your feed with ?url=Feedurl&max=5&offset=1 (to be implemented in html ;))
-::
-<dl>
-	<dt>Title</dt>
-	<dd>#[title]#</dd>
-	#(hasAuthor)#::<dt>Author</dt>
-	<dd>#[author]#</dd>#(/hasAuthor)#
-	<dt>Description</dt>
-	<dd>#[description]#</dd>
-</dl>
-
-<dl>
-#{items}#
-	<dt><a href="#[link]#">#[title]#</a></dt>
-	<dd style="border: thin solid red">#[description]#</dd>
-#{/items}#
-</dl>
-::
-
-Error:
-#(error)#
-You need to install libx
-::
-Problem with url
-#(/error)#
-test
-#(/page)#
-#%env/templates/footer.template%#
-</body>
-</html>
--- a/htroot/FeedReader_p.java
+++ b/htroot/FeedReader_p.java
@ -1,91 +0,0 @@
-//FeedReader_p.java
-//------------
-// part of YACY
-//
-// (C) 2007 Alexander Schier
-//
-//$LastChangedDate$
-//$LastChangedRevision$
-//$LastChangedBy$
-//
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-
-import net.yacy.cora.document.Hit;
-import net.yacy.cora.document.RSSFeed;
-import net.yacy.cora.document.RSSReader;
-import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.util.DateFormatter;
-
-import de.anomic.http.server.RequestHeader;
-import de.anomic.server.serverObjects;
-import de.anomic.server.serverSwitch;
-import de.anomic.server.servletProperties;
-
-// test url:
-// http://localhost:8080/FeedReader_p.html?url=http://www.tagesthemen.de/xml/rss2
-
-public class FeedReader_p {
-    
-    public static servletProperties respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
-        final servletProperties prop = new servletProperties();
-        
-        prop.put("page", "0");
-        if (post != null) {
-            DigestURI url;
-            try {
-                url = new DigestURI(post.get("url"), null);
-            } catch (final MalformedURLException e) {
-                prop.put("page", "2");
-                return prop;
-            }
-            
-            // int maxitems=Integer.parseInt(post.get("max", "0"));
-            // int offset=Integer.parseInt(post.get("offset", "0")); //offset to the first displayed item
-            try {
-                final RSSFeed feed = new RSSReader(url.toString()).getFeed();
-    
-                prop.putHTML("page_title", feed.getChannel().getTitle());
-                if (feed.getChannel().getAuthor() == null) {
-                    prop.put("page_hasAuthor", "0");
-                } else {
-                    prop.put("page_hasAuthor", "1");
-                    prop.putHTML("page_hasAuthor_author", feed.getChannel().getAuthor());
-                }
-                prop.putHTML("page_description", feed.getChannel().getDescription());
-    
-                int i = 0;
-                for (final Hit item: feed) {
-                    prop.putHTML("page_items_" + i + "_author", item.getAuthor());
-                    prop.putHTML("page_items_" + i + "_title", item.getTitle());
-                    prop.putHTML("page_items_" + i + "_link", item.getLink());
-                    prop.putHTML("page_items_" + i + "_description", item.getDescription());
-                    prop.putHTML("page_items_" + i + "_date", DateFormatter.formatShortSecond(item.getPubDate()));
-                    i++;
-                }
-                prop.put("page_items", feed.size());
-                prop.put("page", "1");
-            } catch (IOException e) {
-                Log.logException(e);
-            }
-        }
-    
-        // return rewrite properties
-        return prop;
-    }
-}
--- a/htroot/Load_MediawikiWiki.html
+++ b/htroot/Load_MediawikiWiki.html
--- a/htroot/Load_MediawikiWiki.java
+++ b/htroot/Load_MediawikiWiki.java
@ -30,7 +30,7 @@ import de.anomic.search.SwitchboardConstants;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;

-public class ConfigWikiSearch {
+public class Load_MediawikiWiki {
    
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
--- a/htroot/ConfigPHPBB3Search.html
+++ b/htroot/ConfigPHPBB3Search.html
--- a/htroot/ConfigPHPBB3Search.java
+++ b/htroot/ConfigPHPBB3Search.java
@ -30,7 +30,7 @@ import de.anomic.search.SwitchboardConstants;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;

-public class ConfigPHPBB3Search {
+public class Load_PHPBB3 {
    
    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
        // return variable that accumulates replacements
--- a/htroot/Load_RSS_p.html
+++ b/htroot/Load_RSS_p.html
@ -0,0 +1,90 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>YaCy '#[clientname]#': Configuration of a Wiki Search</title>
+    #%env/templates/metas.template%#
+    <script type="text/javascript">
+    <!--
+    function setall(name, check){
+        var selectForm = document.forms.namedItem(name);
+        var count = selectForm.elements["num"].value;
+        if (check) for(i = 0; i < count; i++) {
+            if (selectForm.elements["item_" + i].checked) {
+                check = false;
+                break;
+            }
+        }
+        for(i = 0; i < count; i++){
+    	    selectForm.elements["item_" + i].checked = check;
+        }
+    }
+    -->
+    </script>
+    <script type="text/javascript" src="/js/sorttable.js"></script>
+  </head>
+  <body id="IndexCreate">
+    #%env/templates/header.template%#
+    #%env/templates/submenuIndexCreate.template%#
+    <h2>Loading of RSS Feeds</h2>
+    <p>
+      RSS feeds can be loaded into the YaCy search index.
+      This does not load the rss file as such into the index but all the messages inside the RSS feeds as individual documents.
+    </p>
+    
+    <form action="Load_RSS_p.html" method="get">
+       <fieldset>
+        <dl>
+          <dt><b>URL of the RSS feed</b></dt>
+          <dd><input type="text" name="url" value="#[url]#" size="60" maxlength="256"/></dd>
+          <dt>Simulation Mode</dt>
+          <dd><input type="submit" name="showrss" value="Show RSS Items" /></dd>
+          <dt>Indexing Mode</dt>
+          <dd>#(showload)#Available after successful loading of rss feed in simulation mode::
+          <!--<input type="submit" name="loadrss" value="Index RSS Items" />-->not yet implemented <b>THIS INTERFACE IS A STUB - DEVELOPMENT IS ONGOING</b>
+          #(/showload)#</dd>
+	      
+        </dl>
+      </fieldset>
+    </form>
+
+	#(showitems)#::
+	<form name="rssfeed"><fieldset>
+	  <legend><label for="table">RSS Feed</label></legend>
+      <dl>
+		<dt>Title</dt><dd>#[title]#</dd>
+		<dt>Author</dt><dd>#[author]#</dd>
+		<dt>Description</dt><dd>#[description]#</dd>
+		<dt>Language</dt><dd>#[language]#</dd>
+		<dt>Date</dt><dd>#[date]#</dd>
+		<dt>Time-to-live</dt><dd>#[ttl]#</dd>
+		<dt>Docs</dt><dd>#[docs]#</dd>
+	  </dl>
+      <table class="sortable" border="0" cellpadding="2" cellspacing="1">
+        <tr class="TableHeader" valign="bottom">
+          <td><input type="checkbox" name="allswitch" onclick="setall(this.form.name, this.value)" /></td>
+          <td>Title</td>
+          <td>URL</td>
+          <td>Author</td>
+          <td>Language</td>
+          <td>Date</td>
+          <td>Description</td>
+        </tr>
+        #{item}#
+        <tr class="TableCellLight">
+          <td align="left"><input type="checkbox" name="item_#[count]#" value="mark_#[hash]#" /></td>
+          <td><a href="#[link]#">#[title]#</a></td>
+          <td><a href="#[link]#">#[link]#</a></td>
+          <td>#[author]#</td>
+          <td>#[language]#</td>
+          <td>#[date]#</td>
+          <td>#[description]#</td>
+        </tr>
+        #{/item}#
+      </table>
+      <input type="hidden" name="num" value="#[num]#" />
+    </fieldset></form>
+	#(/showitems)#
+
+    #%env/templates/footer.template%#
+  </body>
+</html>
--- a/htroot/Load_RSS_p.java
+++ b/htroot/Load_RSS_p.java
@ -0,0 +1,114 @@
+/**
+ *  RSSLoader_p
+ *  Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
+ *  First released 20.08.2010 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *  
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.text.DateFormat;
+
+import net.yacy.cora.document.Hit;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
+import net.yacy.cora.document.RSSReader;
+import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
+
+import de.anomic.crawler.CrawlProfile;
+import de.anomic.crawler.retrieval.Response;
+import de.anomic.http.server.RequestHeader;
+import de.anomic.search.Switchboard;
+import de.anomic.server.serverObjects;
+import de.anomic.server.serverSwitch;
+
+public class Load_RSS_p {
+
+    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
+
+        final serverObjects prop = new serverObjects();
+        final Switchboard sb = (Switchboard)env;
+
+        prop.put("showitems", 0);
+        prop.put("showload", 0);
+        prop.put("url", "");
+        
+        if (post == null) return prop;
+
+        prop.put("url", post.get("url", ""));
+        
+        DigestURI url = null;
+        try {
+            url = post.containsKey("url") ? new DigestURI(post.get("url", ""), null) : null;
+        } catch (MalformedURLException e) {
+            Log.logException(e);
+        }
+        
+        // if we have an url then try to load the rss
+        RSSReader rss = null;
+        if (url != null) try {
+            prop.put("url", url.toNormalform(true, false));
+            Response entry = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+            byte[] resource = entry == null ? null : entry.getContent();
+            rss = resource == null ? null : RSSReader.parse(resource);
+        } catch (IOException e) {
+            Log.logException(e);
+        }
+
+        if (rss != null) {
+            prop.put("showitems", 1);
+            RSSFeed feed = rss.getFeed();
+            RSSMessage channel = feed.getChannel();
+            prop.putHTML("showitems_title", channel.getTitle());
+            String author = channel.getAuthor();
+            if (author == null || author.length() == 0) author = channel.getCopyright();
+            prop.putHTML("showitems_author", author == null ? "" : author);
+            prop.putHTML("showitems_description", channel.getDescription());
+            prop.putHTML("showitems_language", channel.getLanguage());
+            prop.putHTML("showitems_date", DateFormat.getDateTimeInstance().format(channel.getPubDate()));
+            prop.putHTML("showitems_ttl", channel.getTTL());
+            prop.putHTML("showitems_docs", channel.getDocs());
+            
+            int i = 0;
+            for (final Hit item: feed) {
+                try {
+                    url = new DigestURI(item.getLink(), null);
+                    author = item.getAuthor();
+                    if (author == null) author = item.getCopyright();
+                    prop.put("showitems_item_" + i + "_count", i);
+                    prop.putHTML("showitems_item_" + i + "_hash", new String(url.hash()));
+                    prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
+                    prop.putHTML("showitems_item_" + i + "_title", item.getTitle());
+                    prop.putHTML("showitems_item_" + i + "_link", url.toNormalform(false, false));
+                    prop.putHTML("showitems_item_" + i + "_description", item.getDescription());
+                    prop.putHTML("showitems_item_" + i + "_language", item.getLanguage());
+                    prop.putHTML("showitems_item_" + i + "_date", DateFormat.getDateTimeInstance().format(item.getPubDate()));
+                    i++;
+                } catch (MalformedURLException e) {
+                    Log.logException(e);
+                    continue;
+                }
+            }
+            prop.put("showitems_item", i);
+            prop.put("showitems_num", i);
+            if (i > 0) prop.put("showload", 1);
+        }
+        
+        return prop;
+    }
+    
+}
--- a/htroot/RSSLoader_p.java
+++ b/htroot/RSSLoader_p.java
@ -1,97 +0,0 @@
-//ViewFile.java
-//-----------------------
-//part of YaCy
-//(C) by Michael Peter Christen; mc@yacy.net
-//first published on http://www.anomic.de
-//Frankfurt, Germany, 2004
-
-//last major change: 12.07.2004
-
-//This program is free software; you can redistribute it and/or modify
-//it under the terms of the GNU General Public License as published by
-//the Free Software Foundation; either version 2 of the License, or
-//(at your option) any later version.
-
-//This program is distributed in the hope that it will be useful,
-//but WITHOUT ANY WARRANTY; without even the implied warranty of
-//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//GNU General Public License for more details.
-
-//You should have received a copy of the GNU General Public License
-//along with this program; if not, write to the Free Software
-//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-//you must compile this file with
-//javac -classpath .:../Classes Status.java
-//if the shell's current path is HTROOT
-
-import java.io.IOException;
-import java.net.MalformedURLException;
-
-import net.yacy.cora.document.RSSReader;
-import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.kelondro.logging.Log;
-
-import de.anomic.crawler.CrawlProfile;
-import de.anomic.crawler.retrieval.Response;
-import de.anomic.http.server.RequestHeader;
-import de.anomic.search.Switchboard;
-import de.anomic.server.serverObjects;
-import de.anomic.server.serverSwitch;
-
-public class RSSLoader_p {
-
-    public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
-
-        final serverObjects prop = new serverObjects();
-        final Switchboard sb = (Switchboard)env;
-        
-        if (post == null) {
-            return prop;
-        }
-        
-        DigestURI url = null;
-        
-        final String urlString = post.get("url", "");
-        if (urlString.length() > 0) try {
-            url = new DigestURI(urlString, null);
-        } catch (final MalformedURLException e) {
-            return prop;
-        }
-        
-        
-        // if the resource body was not cached we try to load it from web
-        Response entry = null;
-        try {
-            entry = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
-        } catch (final Exception e) {
-            return prop;
-        }
-        if (entry == null) return prop;
-
-        byte[] resource = entry.getContent();
-
-        if (resource == null) {
-            return prop;
-        }
-        
-        // now parse the content as rss
-        RSSReader rss;
-        try {
-            rss = RSSReader.parse(resource);
-        } catch (IOException e) {
-            Log.logException(e);
-            return prop;
-        }
-        
-        // get the links out of the rss
-        //Map<DigestURI, String> map = doc.getAnchors();
-        
-        // put the urls into crawler using the proxy profile
-        
-        
-        
-        return prop;
-    }
-    
-}
--- a/htroot/env/templates/submenuIndexCreate.template
+++ b/htroot/env/templates/submenuIndexCreate.template
@ -2,8 +2,9 @@
  <h3>Index Creation</h3>
  <ul class="SubMenu">
    <li><a href="/CrawlStart_p.html" class="MenuItemLink lock">Crawl Start (Advanced)</a></li>
-    <li><a href="/ConfigWikiSearch.html" class="MenuItemLink">Indexing of Media Wikis</a></li>
-    <li><a href="/ConfigPHPBB3Search.html" class="MenuItemLink">Indexing of phpBB3 Forums</a></li>
+    <li><a href="/Load_MediawikiWiki.html" class="MenuItemLink">Indexing of Media Wikis</a></li>
+    <li><a href="/Load_PHPBB3.html" class="MenuItemLink">Indexing of phpBB3 Forums</a></li>
+    <li><a href="/Load_RSS_p.html" class="MenuItemLink lock">Indexing of RSS Feeds</a></li>
    <li><a href="/ProxyIndexingMonitor_p.html" class="MenuItemLink lock">Scraping Proxy Configuration</a></li>
  </ul>
 </div>
--- a/source/net/yacy/cora/document/RSSMessage.java
+++ b/source/net/yacy/cora/document/RSSMessage.java
@ -39,7 +39,7 @@ public class RSSMessage implements Hit {
        title("title"),
        link("link"),
        description("description"),
-        pubDate("pubDate"),
+        pubDate("pubDate,lastBuildDate"),
        copyright("copyright,dc:publisher,publisher"),
        author("author,dc:creator,creator"),
        subject("subject,dc:subject"),
@ -47,6 +47,7 @@ public class RSSMessage implements Hit {
        referrer("referrer,referer"),
        language("language"),
        guid("guid"),
+        ttl("ttl"),
        docs("docs");
        
        private Set<String> keys;
@ -159,6 +160,10 @@ public class RSSMessage implements Hit {
        return Token.guid.valueFrom(this.map);
    }
    
+    public String getTTL() {
+        return Token.ttl.valueFrom(this.map);
+    }
+    
    public String getDocs() {
        return Token.docs.valueFrom(this.map);
    }
--- a/source/net/yacy/cora/document/RSSReader.java
+++ b/source/net/yacy/cora/document/RSSReader.java
@ -120,6 +120,11 @@ public class RSSReader extends DefaultHandler {
            item = new RSSMessage();
            parsingChannel = true;
        } else if ("item".equals(tag)) {
+            if (parsingChannel) {
+                // the channel ends with the first item not with the channel close tag
+                theChannel.setChannel(item);
+                parsingChannel = false;
+            }
            item = new RSSMessage();
            parsingItem = true;
        } else if ("image".equals(tag)) {
@ -132,7 +137,6 @@ public class RSSReader extends DefaultHandler {
        if (tag == null) return;
        if ("channel".equals(tag)) {
            parsingChannel = false;
-            theChannel.setChannel(item);
        } else if ("item".equals(tag)) {
            theChannel.addMessage(item);
            parsingItem = false;
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -45,6 +45,7 @@ import net.yacy.document.parser.ooxmlParser;
 import net.yacy.document.parser.pdfParser;
 import net.yacy.document.parser.pptParser;
 import net.yacy.document.parser.psParser;
+import net.yacy.document.parser.rssParser;
 import net.yacy.document.parser.rtfParser;
 import net.yacy.document.parser.sevenzipParser;
 import net.yacy.document.parser.swfParser;
@ -81,6 +82,7 @@ public final class TextParser {
        initParser(new pdfParser());
        initParser(new pptParser());
        initParser(new psParser());
+        initParser(new rssParser());
        initParser(new rtfParser());
        initParser(new sevenzipParser());
        initParser(new swfParser());
--- a/source/net/yacy/document/parser/rssParser.java
+++ b/source/net/yacy/document/parser/rssParser.java
@ -0,0 +1,102 @@
+/**
+ *  rssParser.java
+ *  Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
+ *  First released 20.08.2010 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *  
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package net.yacy.document.parser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSReader;
+import net.yacy.cora.document.Hit;
+import net.yacy.document.AbstractParser;
+import net.yacy.document.Document;
+import net.yacy.document.Parser;
+import net.yacy.document.TextParser;
+import net.yacy.document.parser.html.ImageEntry;
+
+public class rssParser extends AbstractParser implements Parser {
+
+    public rssParser() {
+        super("RSS Parser");
+        SUPPORTED_EXTENSIONS.add("rss");
+        SUPPORTED_EXTENSIONS.add("xml");
+        SUPPORTED_MIME_TYPES.add("XML");
+        SUPPORTED_MIME_TYPES.add("text/rss");
+        SUPPORTED_MIME_TYPES.add("application/rss+xml");
+        SUPPORTED_MIME_TYPES.add("application/atom+xml");
+    }
+    
+    public Document[] parse(MultiProtocolURI url, String mimeType, String charset, InputStream source) throws Failure, InterruptedException {
+        RSSReader rssReader;
+        try {
+            rssReader = new RSSReader(source);
+        } catch (IOException e) {
+            throw new Parser.Failure("Load error:" + e.getMessage(), url);
+        }
+        
+        RSSFeed feed = rssReader.getFeed();
+        //RSSMessage channel = feed.getChannel();
+        List<Document> docs = new ArrayList<Document>();
+        MultiProtocolURI uri;
+        Set<String> languages;
+        Map<MultiProtocolURI, String> anchors;
+        Document doc;
+        for (Hit item: feed) try {
+            uri = new MultiProtocolURI(item.getLink());
+            languages = new HashSet<String>();
+            languages.add(item.getLanguage());
+            anchors = new HashMap<MultiProtocolURI, String>();
+            anchors.put(uri, item.getTitle());
+            doc = new Document(
+                    uri,
+                    TextParser.mimeOf(url),
+                    charset,
+                    languages,
+                    item.getSubject(),
+                    item.getTitle(),
+                    item.getAuthor(),
+                    item.getCopyright(),
+                    new String[0],
+                    item.getDescription(),
+                    null,
+                    anchors,
+                    new HashMap<MultiProtocolURI, ImageEntry>(),
+                    false);
+            docs.add(doc);
+        } catch (MalformedURLException e) {
+            continue;
+        }
+        
+        Document[] da = new Document[docs.size()];
+        docs.toArray(da);
+        return da;
+    }
+
+}