- added new protocol loader for 'file'-type URLs

- it is now possible to crawl the local file system with an intranet peer - redesign of URL handling - refactoring: created LGPLed package cora: 'content retrieval api' which may be used externally by other applications without yacy core elements because it has no dependencies to other parts of yacy git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6902 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2010-05-25 12:54:57 +00:00 · 2010-05-25 12:54:57 +00:00 · 11639aef35
commit 11639aef35
parent 2fd795207c
86 changed files with 2134 additions and 1676 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -685,7 +685,10 @@ crawler.http.maxFileSize=1048576
 crawler.ftp.maxFileSize=1048576

 # smb crawler specific settings: maximum size
-crawler.smb.maxFileSize=50000000
+crawler.smb.maxFileSize=100000000
+
+# smb crawler specific settings: maximum size
+crawler.file.maxFileSize=100000000

 # maximum number of crawler threads
 crawler.MaxActiveThreads = 200
--- a/htroot/Collage.java
+++ b/htroot/Collage.java
@ -24,7 +24,7 @@

 import java.util.Random;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.kelondro.util.Domains;

 import de.anomic.crawler.ResultImages;
@ -90,8 +90,8 @@ public class Collage {
            final int yOffset = embed ? 0 : 70;
            for (int i = 0; i < fifoSize; i++) {
             
-                final DigestURI baseURL = origins[i].baseURL;
-                final DigestURI imageURL = origins[i].imageEntry.url();
+                final MultiProtocolURI baseURL = origins[i].baseURL;
+                final MultiProtocolURI imageURL = origins[i].imageEntry.url();
                
                // check if this loads a page from localhost, which must be prevented to protect the server
                // against attacks to the administration interface when localhost access is granted
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -36,6 +36,7 @@ import java.util.Set;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.TransformerWriter;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -234,7 +235,7 @@ public class Crawler_p {
                            // stack url
                            sb.crawler.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
                            final CrawlProfile.entry pe = sb.crawler.profilesActiveCrawls.newEntry(
-                                    crawlingStartURL.getHost(),
+                                    (crawlingStartURL.getHost() == null) ? Long.toHexString(System.currentTimeMillis()) : crawlingStartURL.getHost(),
                                    crawlingStartURL,
                                    newcrawlingMustMatch,
                                    newcrawlingMustNotMatch,
@ -345,7 +346,7 @@ public class Crawler_p {
                                writer.close();
                                
                                //String headline = scraper.getHeadline();
-                                final Map<DigestURI, String> hyperlinks = scraper.getAnchors();
+                                final Map<MultiProtocolURI, String> hyperlinks = scraper.getAnchors();
                                
                                // creating a crawler profile
                                final DigestURI crawlURL = new DigestURI("file://" + file.toString(), null);
@ -370,11 +371,12 @@ public class Crawler_p {
                                sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
                                
                                // loop through the contained links
-                                final Iterator<Map.Entry<DigestURI, String>> linkiterator = hyperlinks.entrySet().iterator();
+                                final Iterator<Map.Entry<MultiProtocolURI, String>> linkiterator = hyperlinks.entrySet().iterator();
                                DigestURI nexturl;
                                while (linkiterator.hasNext()) {
-                                    final Map.Entry<DigestURI, String> e = linkiterator.next();
-                                    nexturl = e.getKey();
+                                    final Map.Entry<MultiProtocolURI, String> e = linkiterator.next();
+                                    if (e.getKey() == null) continue;
+                                    nexturl = new DigestURI(e.getKey());
                                    if (nexturl == null) continue;
                                    
                                    // enqueuing the url for crawling
--- a/htroot/FeedReader_p.java
+++ b/htroot/FeedReader_p.java
@ -25,9 +25,9 @@
 import java.io.IOException;
 import java.net.MalformedURLException;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
-import net.yacy.document.parser.xml.RSSReader;
+import net.yacy.cora.document.Hit;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSReader;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;

@ -69,7 +69,7 @@ public class FeedReader_p {
                prop.putHTML("page_description", feed.getChannel().getDescription());
    
                int i = 0;
-                for (final RSSMessage item: feed) {
+                for (final Hit item: feed) {
                    prop.putHTML("page_items_" + i + "_author", item.getAuthor());
                    prop.putHTML("page_items_" + i + "_title", item.getTitle());
                    prop.putHTML("page_items_" + i + "_link", item.getLink());
--- a/htroot/SettingsAck_p.html
+++ b/htroot/SettingsAck_p.html
@ -159,23 +159,27 @@
        <td colspan="2"><strong>http Crawler Settings:</strong></td>
      </tr>
      <tr>
-        <td>Maximum Filesize:</td>
+        <td>Maximum HTTP Filesize:</td>
        <td class="settingsValue">#[crawler.http.maxFileSize]#</td>
      </tr>
      <tr>
        <td colspan="2"><strong>ftp Crawler Settings:</strong></td>
      </tr>
      <tr>
-        <td>Maximum Filesize:</td>
+        <td>Maximum FTP Filesize:</td>
        <td class="settingsValue">#[crawler.ftp.maxFileSize]#</td>
      </tr>
      <tr>
        <td colspan="2"><strong>smb Crawler Settings:</strong></td>
      </tr>
      <tr>
-        <td>Maximum Filesize:</td>
+        <td>Maximum SMB Filesize:</td>
        <td class="settingsValue">#[crawler.smb.maxFileSize]#</td>
      </tr>
+      <tr>
+        <td>Maximum file Filesize:</td>
+        <td class="settingsValue">#[crawler.file.maxFileSize]#</td>
+      </tr>
    </table>
    ::<!-- 29: Crawler settings timeout error -->
    <p class="error">Invalid crawler timeout value: <tt>#[crawler.clientTimeout]#</tt></p>
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@ -503,18 +503,32 @@ public class SettingsAck_p {
            long maxSmbSize;
            try {
                maxSmbSize = Integer.parseInt(maxSizeStr);
-                env.setConfig("crawler.smb.maxFileSize", Long.toString(maxFtpSize));
+                env.setConfig("crawler.smb.maxFileSize", Long.toString(maxSmbSize));
            } catch (final NumberFormatException e) {
                prop.put("info", "31");
                prop.putHTML("info_crawler.smb.maxFileSize",post.get("crawler.smb.maxFileSize"));
                return prop;
            }                        
            
+            maxSizeStr = post.get("crawler.file.maxFileSize");
+            if (maxSizeStr==null||maxSizeStr.length()==0) maxSizeStr = "-1";
+            
+            long maxFileSize;
+            try {
+                maxFileSize = Integer.parseInt(maxSizeStr);
+                env.setConfig("crawler.file.maxFileSize", Long.toString(maxFileSize));
+            } catch (final NumberFormatException e) {
+                prop.put("info", "31");
+                prop.putHTML("info_crawler.file.maxFileSize",post.get("crawler.file.maxFileSize"));
+                return prop;
+            }                        
+            
            // everything is ok
            prop.put("info_crawler.clientTimeout",(crawlerTimeout==0) ? "0" :DateFormatter.formatInterval(crawlerTimeout));
            prop.put("info_crawler.http.maxFileSize",(maxHttpSize==-1)? "-1":Formatter.bytesToString(maxHttpSize));
            prop.put("info_crawler.ftp.maxFileSize", (maxFtpSize==-1) ? "-1":Formatter.bytesToString(maxFtpSize));
-            prop.put("info_crawler.smb.maxFileSize", (maxFtpSize==-1) ? "-1":Formatter.bytesToString(maxSmbSize));
+            prop.put("info_crawler.smb.maxFileSize", (maxSmbSize==-1) ? "-1":Formatter.bytesToString(maxSmbSize));
+            prop.put("info_crawler.file.maxFileSize", (maxFileSize==-1) ? "-1":Formatter.bytesToString(maxFileSize));
            prop.put("info", "28");
            return prop;
        }
--- a/htroot/Settings_Crawler.inc
+++ b/htroot/Settings_Crawler.inc
@ -26,6 +26,22 @@
  </tr>
  <tr><td colspan="3"><hr /></td></tr>
  
+  <tr><td colspan="3"><p><strong>SMB Crawler Settings</strong>:</p></td></tr>
+  <tr valign="top">
+    <td>Maximum Filesize:</td>
+    <td><input name="crawler.smb.maxFileSize" type="text" size="16" maxlength="16" value="#[crawler.smb.maxFileSize]#" /></td>
+    <td><em>Maximum allowed file size in bytes that should be downloaded. Larger files will be skipped. <code>-1</code> means unlimited.</em></td>
+  </tr>
+  <tr><td colspan="3"><hr /></td></tr>
+  
+  <tr><td colspan="3"><p><strong>Local File Crawler Settings</strong>:</p></td></tr>
+  <tr valign="top">
+    <td>Maximum Filesize:</td>
+    <td><input name="crawler.file.maxFileSize" type="text" size="16" maxlength="16" value="#[crawler.file.maxFileSize]#" /></td>
+    <td><em>Maximum allowed file size in bytes that should be downloaded. Larger files will be skipped. <code>-1</code> means unlimited.</em></td>
+  </tr>
+  <tr><td colspan="3"><hr /></td></tr>
+  
  <tr valign="top">
    <td>&nbsp;</td>
    <td><input type="submit" name="crawlerSettings" value="Submit" /></td>
--- a/htroot/Settings_p.java
+++ b/htroot/Settings_p.java
@ -202,6 +202,7 @@ public final class Settings_p {
        prop.putHTML("crawler.http.maxFileSize",sb.getConfig("crawler.http.maxFileSize", "-1"));
        prop.putHTML("crawler.ftp.maxFileSize",sb.getConfig("crawler.ftp.maxFileSize", "-1"));
        prop.putHTML("crawler.smb.maxFileSize",sb.getConfig("crawler.smb.maxFileSize", "-1"));
+        prop.putHTML("crawler.file.maxFileSize",sb.getConfig("crawler.file.maxFileSize", "-1"));
        
        // return rewrite properties
        return prop;
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -35,6 +35,7 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.ParserException;
@ -372,7 +373,7 @@ public class ViewFile {
                i += putMediaInfo(prop, wordArray, i, document.getAudiolinks(), "audio", (i % 2 == 0));
                dark = (i % 2 == 0);
                
-                final HashMap<String, ImageEntry> ts = document.getImages();
+                final HashMap<MultiProtocolURI, ImageEntry> ts = document.getImages();
                final Iterator<ImageEntry> tsi = ts.values().iterator();
                ImageEntry entry;
                while (tsi.hasNext()) {
@ -439,9 +440,9 @@ public class ViewFile {
        return message;
    }
    
-    private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map<DigestURI, String> media, final String name, boolean dark) {
-        final Iterator<Map.Entry<DigestURI, String>> mi = media.entrySet().iterator();
-        Map.Entry<DigestURI, String> entry;
+    private static int putMediaInfo(final serverObjects prop, final String[] wordArray, int c, final Map<MultiProtocolURI, String> media, final String name, boolean dark) {
+        final Iterator<Map.Entry<MultiProtocolURI, String>> mi = media.entrySet().iterator();
+        Map.Entry<MultiProtocolURI, String> entry;
        int i = 0;
        while (mi.hasNext()) {
            entry = mi.next();
--- a/htroot/api/feed.java
+++ b/htroot/api/feed.java
@ -2,8 +2,8 @@

 import java.util.Date;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;

 import de.anomic.http.server.RequestHeader;
 import de.anomic.search.Switchboard;
--- a/htroot/rct_p.java
+++ b/htroot/rct_p.java
@ -30,8 +30,8 @@ import java.text.ParseException;
 import java.util.Date;
 import java.util.Iterator;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.Hit;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.DateFormatter;

@ -57,7 +57,7 @@ public class rct_p {
                final yacySeed seed = (peerhash == null) ? null : sb.peers.getConnected(peerhash);
                final RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(sb.peers, seed, 20, 60000);
                if (feed != null) {
-                    for (final RSSMessage item: feed) {
+                    for (final Hit item: feed) {
                        //System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
                        
                        // put url on remote crawl stack
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -35,8 +35,8 @@ import java.util.Map;
 import java.util.TreeMap;
 import java.util.TreeSet;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.word.WordReference;
 import net.yacy.kelondro.data.word.WordReferenceRow;
--- a/htroot/yacy/transferRWI.java
+++ b/htroot/yacy/transferRWI.java
@ -30,8 +30,8 @@
 import java.util.ArrayList;
 import java.util.Iterator;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.word.WordReferenceRow;
 import net.yacy.kelondro.index.HandleSet;
--- a/htroot/yacy/transferURL.java
+++ b/htroot/yacy/transferURL.java
@ -29,8 +29,8 @@
 import java.io.IOException;
 import java.text.ParseException;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -32,11 +32,11 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.TreeSet;

+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
-import net.yacy.document.content.RSSMessage;
 import net.yacy.document.geolocalization.Location;
-import net.yacy.document.parser.xml.RSSFeed;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.word.Word;
--- a/htroot/yacysearch_location.java
+++ b/htroot/yacysearch_location.java
@ -22,7 +22,8 @@ import java.util.Set;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.TimeUnit;

-import net.yacy.document.content.RSSMessage;
+import net.yacy.cora.document.RSSMessage;
+import net.yacy.cora.services.Search;
 import net.yacy.document.geolocalization.Location;
 import de.anomic.data.LibraryProvider;
 import de.anomic.http.server.HeaderFramework;
@ -32,7 +33,6 @@ import de.anomic.search.SwitchboardConstants;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
-import de.anomic.yacy.yacyClient;

 import java.util.Date;
 import net.yacy.kelondro.util.DateFormatter;
@ -91,7 +91,8 @@ public class yacysearch_location {
            
            if (search_title || search_publisher || search_creator || search_subject) try {
                // get a queue of search results
-                BlockingQueue<RSSMessage> results = yacyClient.search(null, query, false, false, maximumTime, Integer.MAX_VALUE);
+                String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8080") + "/yacysearch.rss";
+                BlockingQueue<RSSMessage> results = Search.search(rssSearchServiceURL, query, false, false, maximumTime, Integer.MAX_VALUE);
                
                // take the results and compute some locations
                RSSMessage message;
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@ -36,8 +36,8 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.Hit;
+import net.yacy.cora.document.RSSFeed;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
@ -421,7 +421,7 @@ public class CrawlQueues {
        // parse the rss
        DigestURI url, referrer;
        Date loaddate;
-        for (final RSSMessage item: feed) {
+        for (final Hit item: feed) {
            //System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
            
            // put url on remote crawl stack
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@ -354,6 +354,7 @@ public final class CrawlStacker {
        // returns true if the url can be accepted accoring to network.unit.domain
        if (url == null) return "url is null";
        final String host = url.getHost();
+        if (this.acceptLocalURLs && host == null && url.getProtocol().equals("file")) return null;
        if (host == null) return "url.host is null";
        if (this.acceptGlobalURLs && this.acceptLocalURLs) return null; // fast shortcut to avoid dnsResolve
        // check if this is a local address and we are allowed to index local pages:
--- a/source/de/anomic/crawler/ResultImages.java
+++ b/source/de/anomic/crawler/ResultImages.java
@ -30,9 +30,9 @@ import java.util.HashMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.kelondro.data.meta.DigestURI;


 public class ResultImages {
@ -48,18 +48,17 @@ public class ResultImages {
    // we also check all links for a double-check so we don't get the same image more than once in any queue
    // image links may appear double here even if the pages where the image links are embedded already are checked for double-occurrence:
    // the same images may be linked from different pages
-    private static final ConcurrentHashMap<String, Long> doubleCheck = new ConcurrentHashMap<String, Long>(); // (url-hash, time) when the url appeared first
+    private static final ConcurrentHashMap<MultiProtocolURI, Long> doubleCheck = new ConcurrentHashMap<MultiProtocolURI, Long>(); // (url, time) when the url appeared first
    
    public static void registerImages(final Document document, final boolean privateEntry) {
        if (document == null) return;
        if (document.dc_source() == null) return;
        
-        final HashMap<String, ImageEntry> images = document.getImages();
+        final HashMap<MultiProtocolURI, ImageEntry> images = document.getImages();
        for (final ImageEntry image: images.values()) {
            // do a double-check; attention: this can be time-consuming since this possibly needs a DNS-lookup
-            String hashstring = new String(image.url().hash());
-            if (doubleCheck.containsKey(hashstring)) continue;
-            doubleCheck.put(hashstring, System.currentTimeMillis());
+            if (doubleCheck.containsKey(image.url())) continue;
+            doubleCheck.put(image.url(), System.currentTimeMillis());
            
            final String name = image.url().getFile();
            boolean good = false;
@ -144,8 +143,8 @@ public class ResultImages {
    
    public static class OriginEntry {
        public ImageEntry imageEntry;
-        public DigestURI baseURL;
-        public OriginEntry(final ImageEntry imageEntry, final DigestURI baseURL) {
+        public MultiProtocolURI baseURL;
+        public OriginEntry(final ImageEntry imageEntry, final MultiProtocolURI baseURL) {
            this.imageEntry = imageEntry;
            this.baseURL = baseURL;
        }
--- a/source/de/anomic/crawler/RobotsTxt.java
+++ b/source/de/anomic/crawler/RobotsTxt.java
@ -35,6 +35,7 @@ import java.util.Date;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.kelondro.blob.BEncodedHeap;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
@ -317,7 +318,7 @@ public class RobotsTxt {
        reqHeaders.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
        
        // adding referer
-        reqHeaders.put(RequestHeader.REFERER, (DigestURI.newURL(robotsURL,"/")).toNormalform(true, true));
+        reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));
        
        if (entry != null) {
            oldEtag = entry.getETag();
@ -380,7 +381,7 @@ public class RobotsTxt {
                    redirectionUrlString = redirectionUrlString.trim();
                    
                    // generating the new URL object
-                    final DigestURI redirectionUrl = DigestURI.newURL(robotsURL, redirectionUrlString);      
+                    final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(robotsURL, redirectionUrlString));      
                    
                    // following the redirection
                    if (log.isFinest()) log.logFinest("Redirection detected for robots.txt with URL '" + robotsURL + "'." + 
--- a/source/de/anomic/crawler/retrieval/FTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/FTPLoader.java
@ -32,6 +32,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.util.Date;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
@ -272,8 +273,8 @@ public class FTPLoader {
     * @param entryUrl
     * @return
     */
-    private String getPath(final DigestURI entryUrl) {
-        return DigestURI.unescape(entryUrl.getPath()).replace("\"", "\"\"");
+    private String getPath(final MultiProtocolURI entryUrl) {
+        return MultiProtocolURI.unescape(entryUrl.getPath()).replace("\"", "\"\"");
    }

 }
--- a/source/de/anomic/crawler/retrieval/FileLoader.java
+++ b/source/de/anomic/crawler/retrieval/FileLoader.java
@ -0,0 +1,144 @@
+/**
+ *  FileLoader
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 25.5.2010 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package de.anomic.crawler.retrieval;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import de.anomic.http.server.HeaderFramework;
+import de.anomic.http.server.RequestHeader;
+import de.anomic.http.server.ResponseHeader;
+import de.anomic.net.ftpc;
+import de.anomic.search.Segments;
+import de.anomic.search.Switchboard;
+import de.anomic.data.MimeTable;
+
+import net.yacy.document.TextParser;
+import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.util.DateFormatter;
+import net.yacy.kelondro.util.FileUtils;
+
+public class FileLoader {
+
+    private final Switchboard sb;
+    private final Log log;
+    private final int maxFileSize;
+
+    public FileLoader(final Switchboard sb, final Log log) {
+        this.sb = sb;
+        this.log = log;
+        maxFileSize = (int) sb.getConfigLong("crawler.file.maxFileSize", -1l);
+    }
+    
+    public Response load(final Request request, boolean acceptOnlyParseable) throws IOException {
+        DigestURI url = request.url();
+        if (!url.getProtocol().equals("file")) throw new IOException("wrong loader for FileLoader: " + url.getProtocol());
+
+        RequestHeader requestHeader = new RequestHeader();
+        if (request.referrerhash() != null) {
+            DigestURI ur = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
+            if (ur != null) requestHeader.put(RequestHeader.REFERER, ur.toNormalform(true, false));
+        }
+        
+        // process directories: transform them to html with meta robots=noindex (using the ftpc lib)
+        if (url.isDirectory()) {
+            String[] l = url.list();
+            if (l == null) {
+                // this can only happen if there is no connection or the directory does not exist
+                log.logInfo("directory listing not available. URL = " + request.url().toString());
+                sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, "directory listing not available. URL = " + request.url().toString());
+                throw new IOException("directory listing not available. URL = " + request.url().toString());
+            }
+            String u = url.toNormalform(true, true);
+            List<String> list = new ArrayList<String>();
+            for (String s: l) {
+                list.add(u + ((u.endsWith("/") || u.endsWith("\\")) ? "" : "/") + s);
+            }
+         
+            StringBuilder content = ftpc.dirhtml(u, null, null, null, list, true);
+            
+            ResponseHeader responseHeader = new ResponseHeader();
+            responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(new Date()));
+            responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
+            Response response = new Response(
+                    request, 
+                    requestHeader,
+                    responseHeader,
+                    "200",
+                    sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()),
+                    content.toString().getBytes());
+            
+            return response;
+        }
+        
+        // create response header
+        String mime = MimeTable.ext2mime(url.getFileExtension());
+        ResponseHeader responseHeader = new ResponseHeader();
+        responseHeader.put(HeaderFramework.LAST_MODIFIED, DateFormatter.formatRFC1123(new Date(url.lastModified())));
+        responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
+        
+        // check mime type and availability of parsers
+        // and also check resource size and limitation of the size
+        long size = url.length();
+        String parserError = null;
+        if ((acceptOnlyParseable && (parserError = TextParser.supports(url, mime)) != null) ||
+            (size > maxFileSize && maxFileSize >= 0)) {
+            // we know that we cannot process that file before loading
+            // only the metadata is returned
+            
+            if (parserError != null) {
+                log.logInfo("No parser available in File crawler: '" + parserError + "' for URL " + request.url().toString() + ": parsing only metadata");
+            } else {
+                log.logInfo("Too big file in File crawler with size = " + size + " Bytes for URL " + request.url().toString() + ": parsing only metadata");
+            }
+            
+            // create response with metadata only
+            responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/plain");
+            Response response = new Response(
+                    request, 
+                    requestHeader,
+                    responseHeader,
+                    "200",
+                    sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()),
+                    url.toNormalform(true, true).getBytes());
+            return response;
+        }
+        
+        // load the resource
+        InputStream is = url.getInputStream();
+        byte[] b = FileUtils.read(is);
+        is.close();
+        
+        // create response with loaded content
+        Response response = new Response(
+                request, 
+                requestHeader,
+                responseHeader,
+                "200",
+                sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle()),
+                b);
+        return response;
+    }
+}
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@ -27,6 +27,7 @@ package de.anomic.crawler.retrieval;
 import java.io.IOException;
 import java.util.Date;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
@ -180,7 +181,7 @@ public final class HTTPLoader {
                    }
                    
                    // normalizing URL
-                    final DigestURI redirectionUrl = DigestURI.newURL(request.url(), redirectionUrlString);
+                    final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(request.url(), redirectionUrlString));

                    // restart crawling with new url
                    this.log.logInfo("CRAWLER Redirection detected ('" + res.getStatusLine() + "') for URL " + request.url().toString());
@ -289,7 +290,7 @@ public final class HTTPLoader {
                    }
                    
                    // normalizing URL
-                    final DigestURI redirectionUrl = DigestURI.newURL(request.url(), redirectionUrlString);
+                    final DigestURI redirectionUrl = new DigestURI(MultiProtocolURI.newURL(request.url(), redirectionUrlString));

                    
                    // if we are already doing a shutdown we don't need to retry crawling
--- a/source/de/anomic/data/BookmarkHelper.java
+++ b/source/de/anomic/data/BookmarkHelper.java
@ -52,6 +52,7 @@ import org.xml.sax.SAXException;

 import de.anomic.data.bookmarksDB.Bookmark;
 import de.anomic.data.bookmarksDB.Tag;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.TransformerWriter;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -128,9 +129,9 @@ public class BookmarkHelper {
            
        int importCount = 0;
        
-        Map<DigestURI, String> links = new HashMap<DigestURI, String>();
+        Map<MultiProtocolURI, String> links = new HashMap<MultiProtocolURI, String>();
        String title;
-        DigestURI url;
+        MultiProtocolURI url;
        Bookmark bm;
        final Set<String> tags=listManager.string2set(tag); //this allow multiple default tags
        try {
@ -142,14 +143,14 @@ public class BookmarkHelper {
            writer.close();
            links = scraper.getAnchors();           
        } catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());}
-        for (Entry<DigestURI, String> link: links.entrySet()) {
-            url= link.getKey();
-            title=link.getValue();
+        for (Entry<MultiProtocolURI, String> link: links.entrySet()) {
+            url = link.getKey();
+            title = link.getValue();
            Log.logInfo("BOOKMARKS", "links.get(url)");
-            if(title.equals("")){//cannot be displayed
-                title=url.toString();
+            if (title.equals("")) {//cannot be displayed
+                title = url.toString();
            }
-            bm=db.new Bookmark(url.toString());
+            bm = db.new Bookmark(url.toString());
            bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
            bm.setTags(tags);
            bm.setPublic(importPublic);
--- a/source/de/anomic/data/MimeTable.java
+++ b/source/de/anomic/data/MimeTable.java
@ -5,7 +5,7 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.util.Properties;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;

 public class MimeTable {

@ -42,11 +42,11 @@ public class MimeTable {
        return mimeTable.getProperty(ext, dfltMime);
    }
    
-    public static String url2mime(final DigestURI url, final String dfltMime) {
+    public static String url2mime(final MultiProtocolURI url, final String dfltMime) {
        return ext2mime(url.getFileExtension(), dfltMime);
    }
    
-    public static String url2mime(final DigestURI url) {
+    public static String url2mime(final MultiProtocolURI url) {
        return ext2mime(url.getFileExtension());
    }
 }
--- a/source/de/anomic/net/ftpc.java
+++ b/source/de/anomic/net/ftpc.java
@ -2645,7 +2645,7 @@ public class ftpc {
        page.append("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\n");
        page.append("<html><head>\n");
        page.append("  <title>" + title + "</title>\n");
-        page.append("  <meta name=\"generator\" content=\"YaCy ftpc dirlisting\">\n");
+        page.append("  <meta name=\"generator\" content=\"YaCy directory listing\">\n");
        if (metaRobotNoindex) {
            page.append("  <meta name=\"robots\" content=\"noindex\">\n");
        }
@ -2674,7 +2674,7 @@ public class ftpc {
                if (line.length() > nameEnd) {
                    page.append(line.substring(nameEnd));
                }
-            } else if (line.startsWith("http://") || line.startsWith("ftp://") || line.startsWith("smb://")) {
+            } else if (line.startsWith("http://") || line.startsWith("ftp://") || line.startsWith("smb://") || line.startsWith("file://")) {
                page.append("<a href=\"" + line + "\">" + line + "</a>");
            } else {
               // raw
--- a/source/de/anomic/search/DocumentIndex.java
+++ b/source/de/anomic/search/DocumentIndex.java
@ -146,7 +146,7 @@ public class DocumentIndex extends Segment {
     * If the given file is a path to a directory, the complete sub-tree is indexed
     * @param start
     */
-    public void addConcurrent(DigestURI start) {
+    public void addConcurrent(DigestURI start) throws IOException {
        assert (start != null);
        assert (start.canRead()) : start.toString();
        if (!start.isDirectory()) {
--- a/source/de/anomic/search/MediaSnippet.java
+++ b/source/de/anomic/search/MediaSnippet.java
@ -32,6 +32,7 @@ import java.util.TreeSet;

 import de.anomic.data.MimeTable;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -130,25 +131,25 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
    public static ArrayList<MediaSnippet> computeMediaSnippets(final Document document, final HandleSet queryhashes, final ContentDomain mediatype) {
        
        if (document == null) return new ArrayList<MediaSnippet>();
-        Map<DigestURI, String> media = null;
+        Map<MultiProtocolURI, String> media = null;
        if (mediatype == ContentDomain.AUDIO) media = document.getAudiolinks();
        else if (mediatype == ContentDomain.VIDEO) media = document.getVideolinks();
        else if (mediatype == ContentDomain.APP) media = document.getApplinks();
        if (media == null) return null;
        
-        final Iterator<Map.Entry<DigestURI, String>> i = media.entrySet().iterator();
-        Map.Entry<DigestURI, String> entry;
+        final Iterator<Map.Entry<MultiProtocolURI, String>> i = media.entrySet().iterator();
+        Map.Entry<MultiProtocolURI, String> entry;
        DigestURI url;
        String desc;
        final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            entry = i.next();
-            url = entry.getKey();
+            url = new DigestURI(entry.getKey());
            desc = entry.getValue();
            int ranking = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
                           TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
            if (ranking < 2 * queryhashes.size()) {
-                result.add(new MediaSnippet(mediatype, url, MimeTable.url2mime(url), desc, document.getTextLength(), null, ranking, document.dc_source()));
+                result.add(new MediaSnippet(mediatype, url, MimeTable.url2mime(url), desc, document.getTextLength(), null, ranking, new DigestURI(document.dc_source())));
            }
        }
        return result;
@ -167,7 +168,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
        final ArrayList<MediaSnippet> result = new ArrayList<MediaSnippet>();
        while (i.hasNext()) {
            ientry = i.next();
-            url = ientry.url();
+            url = new DigestURI(ientry.url());
            String u = url.toString();
            if (u.indexOf(".ico") >= 0 || u.indexOf("favicon") >= 0) continue;
            if (ientry.height() > 0 && ientry.height() < 64) continue;
@ -177,7 +178,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
                           TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
                           TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
            final int ranking = Integer.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);  
-            result.add(new MediaSnippet(ContentDomain.IMAGE, url, MimeTable.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, document.dc_source()));
+            result.add(new MediaSnippet(ContentDomain.IMAGE, url, MimeTable.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, new DigestURI(document.dc_source())));
        }
        return result;
    }
--- a/source/de/anomic/search/MetadataRepository.java
+++ b/source/de/anomic/search/MetadataRepository.java
@ -38,6 +38,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.CharacterCoding;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -516,7 +517,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
                        if (format == 2) {
                            pw.println("<item>");
                            pw.println("<title>" + CharacterCoding.unicode2xml(metadata.dc_title(), true) + "</title>");
-                            pw.println("<link>" + DigestURI.escape(url) + "</link>");
+                            pw.println("<link>" + MultiProtocolURI.escape(url) + "</link>");
                            if (metadata.dc_creator().length() > 0) pw.println("<author>" + CharacterCoding.unicode2xml(metadata.dc_creator(), true) + "</author>");
                            if (metadata.dc_subject().length() > 0) pw.println("<description>" + CharacterCoding.unicode2xml(metadata.dc_subject(), true) + "</description>");
                            pw.println("<pubDate>" + entry.moddate().toString() + "</pubDate>");
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@ -39,6 +39,7 @@ import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -631,7 +632,7 @@ public final class RankingProcess extends Thread {
        // take out relevant information for reference computation
        if ((resultEntry.url() == null) || (resultEntry.title() == null)) return;
        //final String[] urlcomps = htmlFilterContentScraper.urlComps(resultEntry.url().toNormalform(true, true)); // word components of the url
-        final String[] descrcomps = DigestURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description
+        final String[] descrcomps = MultiProtocolURI.splitpattern.split(resultEntry.title().toLowerCase()); // words in the description
        
        // add references
        //addTopic(urlcomps);
--- a/source/de/anomic/search/ResultEntry.java
+++ b/source/de/anomic/search/ResultEntry.java
@ -31,6 +31,7 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Date;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -124,7 +125,7 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
        return (alternative_urlstring == null) ? urlcomps.url().toNormalform(false, true) : alternative_urlstring;
    }
    public String urlname() {
-        return (alternative_urlname == null) ? DigestURI.unescape(urlcomps.url().toNormalform(false, true)) : alternative_urlname;
+        return (alternative_urlname == null) ? MultiProtocolURI.unescape(urlcomps.url().toNormalform(false, true)) : alternative_urlname;
    }
    public String title() {
        return urlcomps.dc_title();
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@ -30,8 +30,8 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Map;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.index.HandleSet;
@ -370,8 +370,8 @@ public class ResultFetcher {
        
        // apply 'common-sense' heuristic using references
        final String urlstring = rentry.url().toNormalform(true, true);
-        final String[] urlcomps = DigestURI.urlComps(urlstring);
-        final String[] descrcomps = DigestURI.splitpattern.split(rentry.title().toLowerCase());
+        final String[] urlcomps = MultiProtocolURI.urlComps(urlstring);
+        final String[] descrcomps = MultiProtocolURI.splitpattern.split(rentry.title().toLowerCase());
        Navigator.Item tc;
        for (int j = 0; j < urlcomps.length; j++) {
            tc = topwords.get(urlcomps[j]);
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@ -37,6 +37,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.ParserException;
@ -198,7 +199,7 @@ public class Segment {
    private int addPageIndex(final DigestURI url, final Date urlModified, final Document document, final Condenser condenser, final String language, final char doctype, final int outlinksSame, final int outlinksOther) {
        int wordCount = 0;
        final int urlLength = url.toNormalform(true, true).length();
-        final int urlComps = DigestURI.urlComps(url.toString()).length;
+        final int urlComps = MultiProtocolURI.urlComps(url.toString()).length;
        
        // iterate over all words of context text
        final Iterator<Map.Entry<String, Word>> i = condenser.words().entrySet().iterator();
@ -273,10 +274,10 @@ public class Segment {
                    if (!u.contains("/" + language + "/") && !u.contains("/" + ISO639.country(language).toLowerCase() + "/")) {
                        // no confirmation using the url, use the TLD
                        language = url.language();
-                        System.out.println(error + ", corrected using the TLD");
+                        log.logWarning(error + ", corrected using the TLD");
                    } else {
                        // this is a strong hint that the statistics was in fact correct
-                        System.out.println(error + ", but the url proves that the statistic is correct");
+                        log.logWarning(error + ", but the url proves that the statistic is correct");
                    }
                }
            } else {
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -70,16 +70,17 @@ import java.util.zip.GZIPOutputStream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
 import net.yacy.document.content.DCEntry;
-import net.yacy.document.content.RSSMessage;
 import net.yacy.document.content.SurrogateReader;
 import net.yacy.document.importer.OAIListFriendsLoader;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.document.parser.xml.RSSFeed;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.meta.URIMetadataRow.Components;
@ -291,7 +292,7 @@ public final class Switchboard extends serverSwitch {
        // init sessionid name file
        final String sessionidNamesFile = getConfig("sessionidNamesFile","");
        this.log.logConfig("Loading sessionid file " + sessionidNamesFile);
-        DigestURI.initSessionIDNames(new File(getRootPath(), sessionidNamesFile));
+        MultiProtocolURI.initSessionIDNames(FileUtils.loadList(new File(getRootPath(), sessionidNamesFile)));

        // init tables
        this.tables = new WorkTables(this.workPath);
@ -1733,7 +1734,7 @@ public final class Switchboard extends serverSwitch {
                ((response.profile() == null) || (response.depth() < response.profile().depth()))
        ) {
            // get the hyperlinks
-            final Map<DigestURI, String> hl = document.getHyperlinks();
+            final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
            
            // add all images also to the crawl stack
            for (ImageEntry imageReference : document.getImages().values()) {
@ -1741,15 +1742,15 @@ public final class Switchboard extends serverSwitch {
            }
            
            // insert those hyperlinks to the crawler
-            DigestURI nextUrl;
-            for (Map.Entry<DigestURI, String> nextEntry : hl.entrySet()) {
+            MultiProtocolURI nextUrl;
+            for (Map.Entry<MultiProtocolURI, String> nextEntry : hl.entrySet()) {
                // check for interruption
                checkInterruption();
                
                // process the next hyperlink
                nextUrl = nextEntry.getKey();
                String u = nextUrl.toNormalform(true, true, true);
-                if (!(u.startsWith("http") || u.startsWith("ftp") || u.startsWith("smb"))) continue;
+                if (!(u.startsWith("http://") || u.startsWith("ftp://") || u.startsWith("smb://") || u.startsWith("file://"))) continue;
                // enqueue the hyperlink into the pre-notice-url db
                try {
                    crawlStacker.enqueueEntry(new Request(
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@ -405,7 +405,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
        /* ===========================================================================
         * COMPUTE SNIPPET
         * =========================================================================== */    
-        final DigestURI resFavicon = document.getFavicon();
+        final DigestURI resFavicon = (document.getFavicon() == null) ? null : new DigestURI(document.getFavicon());
        if (resFavicon != null) faviconCache.put(new String(url.hash()), resFavicon);
        // we have found a parseable non-empty file: use the lines

--- a/source/de/anomic/server/serverObjects.java
+++ b/source/de/anomic/server/serverObjects.java
@ -52,8 +52,8 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.CharacterCoding;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
 import net.yacy.kelondro.util.Formatter;
@ -369,9 +369,9 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
        if (this.size() == 0) return "";
        StringBuilder param = new StringBuilder();
        for (Map.Entry<String, String> entry: this.entrySet()) {
-            param.append(DigestURI.escape(entry.getKey()));
+            param.append(MultiProtocolURI.escape(entry.getKey()));
            param.append('=');
-            param.append(DigestURI.escape(entry.getValue()));
+            param.append(MultiProtocolURI.escape(entry.getValue()));
            param.append('&');
        }
        param.setLength(param.length() - 1);
--- a/source/de/anomic/yacy/graphics/WebStructureGraph.java
+++ b/source/de/anomic/yacy/graphics/WebStructureGraph.java
@ -37,6 +37,7 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.kelondro.data.meta.DigestURI;
@ -95,11 +96,11 @@ public class WebStructureGraph {
    }
    
    public Integer[] /*(outlinksSame, outlinksOther)*/ generateCitationReference(final Document document, final Condenser condenser, final Date docDate) {
-        final DigestURI url = document.dc_source();
+        final DigestURI url = new DigestURI(document.dc_source());
        
        // generate citation reference
-        final Map<DigestURI, String> hl = document.getHyperlinks();
-        final Iterator<DigestURI> it = hl.keySet().iterator();
+        final Map<MultiProtocolURI, String> hl = document.getHyperlinks();
+        final Iterator<MultiProtocolURI> it = hl.keySet().iterator();
        byte[] nexturlhashb;
        String nexturlhash;
        final StringBuilder cpg = new StringBuilder(12 * (hl.size() + 1) + 1);
@ -109,7 +110,7 @@ public class WebStructureGraph {
        int GCount = 0;
        int LCount = 0;
        while (it.hasNext()) {
-            nexturlhashb = it.next().hash();
+            nexturlhashb = new DigestURI(it.next()).hash();
            if (nexturlhashb != null) {
                nexturlhash = new String(nexturlhashb);
                assert nexturlhash.length() == 12 : "nexturlhash.length() = " + nexturlhash.length() + ", nexturlhash = " + nexturlhash;
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -54,14 +54,12 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
 import java.util.regex.Pattern;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
-import net.yacy.document.parser.xml.RSSReader;
-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSReader;
+import net.yacy.cora.protocol.HttpConnector;
+import net.yacy.cora.services.Search;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.data.word.WordReference;
@ -86,10 +84,8 @@ import de.anomic.crawler.retrieval.HTTPLoader;
 import de.anomic.http.client.DefaultCharsetFilePart;
 import de.anomic.http.client.DefaultCharsetStringPart;
 import de.anomic.http.client.Client;
-import de.anomic.http.client.RemoteProxyConfig;
 import de.anomic.http.server.HeaderFramework;
 import de.anomic.http.server.RequestHeader;
-import de.anomic.http.server.ResponseContainer;
 import de.anomic.search.RankingProfile;
 import de.anomic.search.RankingProcess;
 import de.anomic.search.Segment;
@ -101,6 +97,22 @@ import de.anomic.tools.crypt;

 public final class yacyClient {

+
+    /**
+     * @see wput
+     * @param target
+     * @param filename
+     * @param post
+     * @return
+     * @throws IOException
+     */
+    private static byte[] postToFile(final yacySeed target, final String filename, final List<Part> post, final int timeout) throws IOException {
+        return HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", post, timeout, false);
+    }
+    private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final List<Part> post, final int timeout) throws IOException {
+        return HttpConnector.wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", post, timeout, false);
+    }
+    
    /**
     * this is called to enrich the seed information by
     * - own address (if peer is behind a nat/router)
@ -134,7 +146,7 @@ public final class yacyClient {
            post.add(new DefaultCharsetStringPart("seed", mySeed.genSeedStr(salt)));
            // send request
            final long start = System.currentTimeMillis();
-            final byte[] content = wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", post, 30000, false);
+            final byte[] content = HttpConnector.wput("http://" + address + "/yacy/hello.html", yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", post, 30000, false);
            yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
            result = FileUtils.table(content);
            break;
@ -237,82 +249,6 @@ public final class yacyClient {
        return count;
    }

-    /**
-     * send data to the server named by vhost
-     * 
-     * @param address address of the server
-     * @param vhost name of the server at address which should respond
-     * @param post data to send (name-value-pairs)
-     * @param gzipBody send with content gzip encoded
-     * @return response body
-     * @throws IOException
-     */
-    /*
-    private static byte[] wput(final String url, String vhost, final List<Part> post, boolean gzipBody) throws IOException {
-        return wput(url, vhost, post, 10000, gzipBody);
-    }
-    */
-    /**
-     * send data to the server named by vhost
-     * 
-     * @param address address of the server
-     * @param vhost name of the server at address which should respond
-     * @param post data to send (name-value-pairs)
-     * @param timeout in milliseconds
-     * @return response body
-     * @throws IOException
-     */
-    private static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout) throws IOException {
-        return wput(url, vhost, post, timeout, false);
-    }
-    /**
-     * send data to the server named by vhost
-     * 
-     * @param address address of the server
-     * @param vhost name of the server at address which should respond
-     * @param post data to send (name-value-pairs)
-     * @param timeout in milliseconds
-     * @param gzipBody send with content gzip encoded
-     * @return response body
-     * @throws IOException
-     */
-    private static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout, final boolean gzipBody) throws IOException {
-        final RequestHeader header = new RequestHeader();
-        header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
-        header.put(HeaderFramework.HOST, vhost);
-        final Client client = new Client(timeout, header);
-        client.setProxy(proxyConfig());
-        
-        ResponseContainer res = null;
-        byte[] content = null;
-        try {
-            // send request/data
-            res = client.POST(url, post, gzipBody);
-            content = res.getData();
-        } finally {
-            if(res != null) {
-                // release connection
-                res.closeStream();
-            }
-        }
-        return content;
-    }
-
-    /**
-     * @see wput
-     * @param target
-     * @param filename
-     * @param post
-     * @return
-     * @throws IOException
-     */
-    private static byte[] postToFile(final yacySeed target, final String filename, final List<Part> post, final int timeout) throws IOException {
-        return wput("http://" + target.getClusterAddress() + "/yacy/" + filename, target.getHexHash() + ".yacyh", post, timeout, false);
-    }
-    private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final List<Part> post, final int timeout) throws IOException {
-        return wput("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", post, timeout, false);
-    }
-
    public static yacySeed querySeed(final yacySeed target, final String seedHash) {
        // prepare request
        final String salt = crypt.randomSalt();
@ -400,7 +336,7 @@ public final class yacyClient {
        // send request
        try {
            /* a long time-out is needed */
-            final byte[] result = wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, (int) maxTime); 
+            final byte[] result = HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, (int) maxTime); 
            final RSSReader reader = RSSReader.parse(result);
            if (reader == null) {
                yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
@ -425,120 +361,11 @@ public final class yacyClient {
            return null;
        }
    }
-
-
-    public static BlockingQueue<RSSMessage> search(String urlBase, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
-        if (urlBase == null) {
-            urlBase = "http://localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") + "/yacysearch.rss";
-        }
-        BlockingQueue<RSSMessage> queue = new LinkedBlockingQueue<RSSMessage>();
-        searchJob job = new searchJob(urlBase, query, verify, global, timeout, maximumRecords, queue);
-        job.start();
-        return queue;
-    }
    
-    private final static int recordsPerSession = 10;
-    
-    public static class searchJob extends Thread {
-
-        String urlBase, query;
-        boolean verify, global;
-        long timeout;
-        int startRecord,  maximumRecords;
-        BlockingQueue<RSSMessage> queue;
-
-        public searchJob(String urlBase, String query, boolean verify, boolean global, long timeout, int maximumRecords, BlockingQueue<RSSMessage> queue) {
-            this.urlBase = urlBase;
-            this.query = query;
-            this.verify = verify;
-            this.global = global;
-            this.timeout = timeout;
-            this.startRecord = 0;
-            this.maximumRecords = maximumRecords;
-            this.queue = queue;
-        }
-
-        public void run() {
-            RSSMessage message;
-            mainloop: while (timeout > 0 && maximumRecords > 0) {
-                long st = System.currentTimeMillis();
-                RSSFeed feed = search(urlBase, query, verify, global, timeout, startRecord, recordsPerSession);
-                if (feed == null || feed.isEmpty()) break mainloop;
-                maximumRecords -= feed.size();
-                innerloop: while (!feed.isEmpty()) {
-                    message = feed.pollMessage();
-                    if (message == null) break innerloop;
-                    try {
-                        queue.put(message);
-                    } catch (InterruptedException e) {
-                        break innerloop;
-                    }
-                }
-                startRecord += recordsPerSession;
-                timeout -= System.currentTimeMillis() - st;
-            }
-            try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {}
-        }
-    }
-    
-    /**
-     * send a query to a yacy public search interface
-     * @param urlBase the target url base (everything before the ? that follows the SRU request syntax properties). can null, then the local peer is used
-     * @param query the query as string
-     * @param startRecord number of first record
-     * @param maximumRecords maximum number of records
-     * @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
-     * @param global if true also search results from other peers are included
-     * @param timeout milliseconds that are waited at maximum for a search result
-     * @return
-     */
-    public static RSSFeed search(String urlBase, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) {
-        // returns a search result from a peer
-        if (urlBase == null) {
-            urlBase = "http://localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") + "/yacysearch.rss";
-        }
-        DigestURI uri = null;
-        try {
-            uri = new DigestURI(urlBase, null);
-        } catch (MalformedURLException e) {
-            yacyCore.log.logWarning("yacyClient.search failed asking peer '" + urlBase + "': bad url, " + e.getMessage());
-            return null;
-        }
-        
-        // prepare request
-        final List<Part> post = new ArrayList<Part>();
-        post.add(new DefaultCharsetStringPart("query", query));
-        post.add(new DefaultCharsetStringPart("startRecord", Integer.toString(startRecord)));
-        post.add(new DefaultCharsetStringPart("maximumRecords", Long.toString(maximumRecords)));
-        post.add(new DefaultCharsetStringPart("verify", verify ? "true" : "false"));
-        post.add(new DefaultCharsetStringPart("resource", global ? "global" : "local"));
-        
-        // send request
-        try {
-            final byte[] result = wput(urlBase, uri.getHost(), post, (int) timeout);
-            //String debug = new String(result); System.out.println("*** DEBUG: " + debug);
-            final RSSReader reader = RSSReader.parse(result);
-            if (reader == null) {
-                yacyCore.log.logWarning("yacyClient.search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
-                return null;
-            }
-            final RSSFeed feed = reader.getFeed();
-            if (feed == null) {
-                // case where the rss reader does not understand the content
-                yacyCore.log.logWarning("yacyClient.search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (2)");
-                return null;
-            }
-            return feed;
-        } catch (final IOException e) {
-            yacyCore.log.logSevere("yacyClient.search error asking peer '" + uri.getHost() + "':" + e.toString());
-            return null;
-        }
-    }
-    
-    public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) {
+    public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException {
        String address = (targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") : targetSeed.getClusterAddress();
        String urlBase = "http://" + address + "/yacysearch.rss";
-        return search(urlBase, query, verify, global, timeout, startRecord, maximumRecords);
+        return Search.search(urlBase, query, verify, global, timeout, startRecord, maximumRecords);
    }
    
    @SuppressWarnings("unchecked")
@ -607,7 +434,7 @@ public final class yacyClient {
        // send request
        HashMap<String, String> result = null;
        try {
-          	result = FileUtils.table(wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000));
+          	result = FileUtils.table(HttpConnector.wput("http://" + target.getClusterAddress() + "/yacy/search.html", target.getHexHash() + ".yacyh", post, 60000));
        } catch (final IOException e) {
            yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore);
            //yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
@ -878,7 +705,7 @@ public final class yacyClient {
        
        // send request
        try {
-            final byte[] content = wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 10000);
+            final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 10000);
            final HashMap<String, String> result = FileUtils.table(content);
            return result;
        } catch (final Exception e) {
@ -902,7 +729,7 @@ public final class yacyClient {
        
        // send request
        try {
-            final byte[] content = wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 20000);
+            final byte[] content = HttpConnector.wput("http://" + targetAddress + "/yacy/transfer.html", targetAddress, post, 20000);
            final HashMap<String, String> result = FileUtils.table(content);
            return result;
        } catch (final Exception e) {
@ -977,7 +804,7 @@ public final class yacyClient {
            
        // send request
        try {
-            final byte[] content = wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", post, 10000);
+            final byte[] content = HttpConnector.wput("http://" + address + "/yacy/crawlReceipt.html", target.getHexHash() + ".yacyh", post, 10000);
            return FileUtils.table(content);
        } catch (final Exception e) {
            // most probably a network time-out exception
@ -1127,7 +954,7 @@ public final class yacyClient {
        post.add(new DefaultCharsetStringPart("entryc", Integer.toString(indexcount)));
        post.add(new DefaultCharsetStringPart("indexes", entrypost.toString()));  
        try {
-            final byte[] content = wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
+            final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferRWI.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
            final Iterator<String> v = FileUtils.strings(content);
            // this should return a list of urlhashes that are unknown
            
@ -1171,7 +998,7 @@ public final class yacyClient {
        }
        post.add(new DefaultCharsetStringPart("urlc", Integer.toString(urlc)));
        try {
-            final byte[] content = wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
+            final byte[] content = HttpConnector.wput("http://" + address + "/yacy/transferURL.html", targetSeed.getHexHash() + ".yacyh", post, timeout, gzipBody);
            final Iterator<String> v = FileUtils.strings(content);
            
            final HashMap<String, String> result = FileUtils.table(v);
@ -1193,7 +1020,7 @@ public final class yacyClient {
        String address = targetSeed.getClusterAddress();
        if (address == null) { address = "localhost:8080"; }
        try {
-            final byte[] content = wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", post, 5000);
+            final byte[] content = HttpConnector.wput("http://" + address + "/yacy/profile.html", targetSeed.getHexHash() + ".yacyh", post, 5000);
            return FileUtils.table(content);
        } catch (final Exception e) {
            yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
@ -1201,14 +1028,6 @@ public final class yacyClient {
        }
    }
    
-    /**
-     * proxy for "to YaCy connections"
-     * @return
-     */
-    private static final RemoteProxyConfig proxyConfig() {
-        final RemoteProxyConfig p = RemoteProxyConfig.getRemoteProxyConfig();
-        return ((p != null) && (p.useProxy()) && (p.useProxy4Yacy())) ? p : null;
-    }

    public static void main(final String[] args) {
        if(args.length > 1) {
@ -1262,7 +1081,7 @@ public final class yacyClient {
            //post.add(new FilePart("filename", new ByteArrayPartSource(filename, file)));
            // do it!
            try {
-                final byte[] response = wput(url.toString(), vhost, post, timeout, gzipBody);
+                final byte[] response = HttpConnector.wput(url.toString(), vhost, post, timeout, gzipBody);
                System.out.println(new String(response));
            } catch (final IOException e) {
                Log.logException(e);
--- a/source/de/anomic/yacy/yacyCore.java
+++ b/source/de/anomic/yacy/yacyCore.java
@ -48,8 +48,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Semaphore;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
--- a/source/de/anomic/yacy/yacyPeerActions.java
+++ b/source/de/anomic/yacy/yacyPeerActions.java
@ -26,8 +26,8 @@ package de.anomic.yacy;

 import java.util.HashMap;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.document.parser.xml.RSSFeed;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
 import net.yacy.kelondro.util.MapTools;
--- a/source/de/anomic/yacy/yacyRelease.java
+++ b/source/de/anomic/yacy/yacyRelease.java
@ -45,8 +45,8 @@ import java.util.Map;
 import java.util.SortedSet;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.ContentScraper;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
@ -74,17 +74,17 @@ public final class yacyRelease extends yacyVersion {
    private static Map<yacyUpdateLocation, DevAndMainVersions> latestReleases = new HashMap<yacyUpdateLocation, DevAndMainVersions>();
    public final static List<yacyUpdateLocation> latestReleaseLocations = new ArrayList<yacyUpdateLocation>(); // will be initialized with value in defaults/yacy.network.freeworld.unit
    
-    private DigestURI url;
+    private MultiProtocolURI url;
    private File releaseFile;
    
    private PublicKey publicKey;
    
-    public yacyRelease(final DigestURI url) {
+    public yacyRelease(final MultiProtocolURI url) {
        super(url.getFileName());
        this.url = url;
    }
    
-    public yacyRelease(final DigestURI url, PublicKey publicKey) {
+    public yacyRelease(final MultiProtocolURI url, PublicKey publicKey) {
        this(url);
        this.publicKey = publicKey;
    }
@ -94,7 +94,7 @@ public final class yacyRelease extends yacyVersion {
        this.releaseFile = releaseFile;
    }

-    public DigestURI getUrl() {
+    public MultiProtocolURI getUrl() {
        return url;
    }
    
@ -241,10 +241,10 @@ public final class yacyRelease extends yacyVersion {
        }
        
        // analyse links in scraper resource, and find link to latest release in it
-        final Map<DigestURI, String> anchors = scraper.getAnchors(); // a url (String) / name (String) relation
+        final Map<MultiProtocolURI, String> anchors = scraper.getAnchors(); // a url (String) / name (String) relation
        final TreeSet<yacyRelease> mainReleases = new TreeSet<yacyRelease>();
        final TreeSet<yacyRelease> devReleases = new TreeSet<yacyRelease>();
-        for(DigestURI url : anchors.keySet()) {
+        for (MultiProtocolURI url : anchors.keySet()) {
            try {
                yacyRelease release = new yacyRelease(url, location.getPublicKey());
                //System.out.println("r " + release.toAnchor());
--- a/source/net/yacy/cora/document/Channel.java
+++ b/source/net/yacy/cora/document/Channel.java
@ -0,0 +1,42 @@
+/**
+ *  Channel
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 10.5.2010 at http://yacy.net
+ *  
+ *  This file is part of YaCy Content Integration
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.document;
+
+public interface Channel extends Iterable<Hit> {
+
+    public void setTitle(String title);
+    
+    public void setLink(String link);
+    
+    public void setDescription(String description);
+    
+    public void setImageURL(String imageUrl);
+    
+    public void setTotalResults(String totalResults);
+    
+    public void setStartIndex(String startIndex);
+    
+    public void setItemsPerPage(String itemsPerPage);
+    
+    public void setSearchTerms(String searchTerms);
+}
--- a/source/net/yacy/cora/document/Channels.java
+++ b/source/net/yacy/cora/document/Channels.java
@ -0,0 +1,27 @@
+/**
+ *  Channels
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 10.5.2010 at http://yacy.net
+ *  
+ *  This file is part of YaCy Content Integration
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.document;
+
+public class Channels {
+
+}
--- a/source/net/yacy/cora/document/Hit.java
+++ b/source/net/yacy/cora/document/Hit.java
@ -0,0 +1,74 @@
+/**
+ *  Hit
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 10.5.2010 at http://yacy.net
+ *  
+ *  This file is part of YaCy Content Integration
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.document;
+
+public interface Hit {
+
+    public void setAuthor(String title);
+    
+    public void setCopyright(String title);
+    
+    public void setCategory(String title);
+    
+    public void setTitle(String title);
+    
+    public void setLink(String link);
+    
+    public void setReferrer(String title);
+    
+    public void setLanguage(String title);
+    
+    public void setDescription(String description);
+    
+    public void setCreator(String pubdate);
+    
+    public void setPubDate(String pubdate);
+    
+    public void setGuid(String guid);
+    
+    public void setDocs(String guid);
+
+    
+    public String getAuthor();
+    
+    public String getCopyright();
+    
+    public String getCategory();
+    
+    public String getTitle();
+    
+    public String getLink();
+    
+    public String getReferrer();
+    
+    public String getLanguage();
+    
+    public String getDescription();
+    
+    public String getPubDate();
+    
+    public String getGuid();
+    
+    public String getDocs();
+
+}
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
--- a/source/net/yacy/cora/document/Punycode.java
+++ b/source/net/yacy/cora/document/Punycode.java
@ -21,19 +21,19 @@
 * USA
 */

-package net.yacy.kelondro.util;
+package net.yacy.cora.document;


 public class Punycode {
  /* Punycode parameters */
-  final static int TMIN = 1;
-  final static int TMAX = 26;
-  final static int BASE = 36;
-  final static int INITIAL_N = 128;
-  final static int INITIAL_BIAS = 72;
-  final static int DAMP = 700;
-  final static int SKEW = 38;
-  final static char DELIMITER = '-';
+  private final static int TMIN = 1;
+  private final static int TMAX = 26;
+  private final static int BASE = 36;
+  private final static int INITIAL_N = 128;
+  private final static int INITIAL_BIAS = 72;
+  private final static int DAMP = 700;
+  private final static int SKEW = 38;
+  private final static char DELIMITER = '-';

  /**
   * Punycodes a unicode string.
--- a/source/net/yacy/document/parser/xml/RSSFeed.java
+++ b/source/net/yacy/document/parser/xml/RSSFeed.java
@ -1,40 +1,31 @@
-// RSSFeed.java
-// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 24.04.2008 on http://yacy.net
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+/**
+ *  RSSFeed
+ *  Copyright 2007 by Michael Peter Christen
+ *  First released 16.7.2007 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */

-package net.yacy.document.parser.xml;
+package net.yacy.cora.document;

 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentLinkedQueue;

-import net.yacy.document.content.RSSMessage;
-
-
-public class RSSFeed implements Iterable<RSSMessage> {
+public class RSSFeed implements Iterable<Hit> {

    // static channel names of feeds
    public static final String TEST           = "TEST";
@ -119,7 +110,7 @@ public class RSSFeed implements Iterable<RSSMessage> {
        return messages.size();
    }
    
-    public Iterator<RSSMessage> iterator() {
+    public Iterator<Hit> iterator() {
        return new messageIterator();
    }
    
@ -131,7 +122,7 @@ public class RSSFeed implements Iterable<RSSMessage> {
        return messages.remove(nextGUID);
    }

-    public class messageIterator implements Iterator<RSSMessage>{
+    public class messageIterator implements Iterator<Hit>{
        
        Iterator<String> GUIDiterator;
        String lastGUID;
--- a/source/net/yacy/document/content/RSSMessage.java
+++ b/source/net/yacy/document/content/RSSMessage.java
@ -1,31 +1,24 @@
-// RSSMessage.java
-// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 16.07.2007 on http://yacy.net
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
-// $LastChangedRevision: 1986 $
-// $LastChangedBy: orbiter $
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+/**
+ *  RSSMessage
+ *  Copyright 2007 by Michael Peter Christen
+ *  First released 16.7.2007 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */

-
-package net.yacy.document.content;
+package net.yacy.cora.document;

 import java.util.Date;
 import java.util.HashMap;
@ -33,7 +26,7 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;

-public class RSSMessage {
+public class RSSMessage implements Hit {

    // statics for item generation and automatic categorization
    private static int guidcount = 0;
@ -165,4 +158,74 @@ public class RSSMessage {
    public String toString() {
        return this.map.toString();
    }
+    
+    public void setAuthor(String title) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setCategory(String title) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setCopyright(String title) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setCreator(String pubdate) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setDescription(String description) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setDocs(String guid) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setGuid(String guid) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setLanguage(String title) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setLink(String link) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setPubDate(String pubdate) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setReferrer(String title) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setSize(long size) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setSizename(String sizename) {
+        // TODO Auto-generated method stub
+        
+    }
+
+    public void setTitle(String title) {
+        // TODO Auto-generated method stub
+        
+    }
 }
--- a/source/net/yacy/document/parser/xml/RSSReader.java
+++ b/source/net/yacy/document/parser/xml/RSSReader.java
@ -1,30 +1,24 @@
-// RSSReader.java
-// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
-// first published 16.07.2007 on http://yacy.net
-//
-// This is a part of YaCy, a peer-to-peer based web search engine
-//
-// $LastChangedDate$
-// $LastChangedRevision$
-// $LastChangedBy$
-//
-// LICENSE
-// 
-// This program is free software; you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation; either version 2 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+/**
+ *  RSSReader
+ *  Copyright 2007 by Michael Peter Christen
+ *  First released 16.7.2007 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */

-package net.yacy.document.parser.xml;
+package net.yacy.cora.document;

 import java.io.ByteArrayInputStream;
 import java.io.IOException;
@ -34,10 +28,6 @@ import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;

-import net.yacy.document.content.RSSMessage;
-import net.yacy.kelondro.logging.Log;
-import net.yacy.kelondro.util.ByteBuffer;
-
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@ -86,25 +76,21 @@ public class RSSReader extends DefaultHandler {
        }
    }
    
-    public static RSSReader parse(final byte[] a) {
+    public static RSSReader parse(final byte[] a) throws IOException {

        // check integrity of array
        if ((a == null) || (a.length == 0)) {
-            Log.logWarning("rssReader", "response=null");
-            return null;
+            throw new IOException("response=null");
        }
        if (a.length < 100) {
-            Log.logWarning("rssReader", "response=" + new String(a));
-            return null;
+            throw new IOException("response=" + new String(a));
        }
-        if (!ByteBuffer.equals(a, "<?xml".getBytes())) {
-            Log.logWarning("rssReader", "response does not contain valid xml");
-            return null;
+        if (!equals(a, "<?xml".getBytes())) {
+            throw new IOException("response does not contain valid xml");
        }
        final String end = new String(a, a.length - 10, 10);
        if (end.indexOf("rss") < 0) {
-            Log.logWarning("rssReader", "response incomplete");
-            return null;
+            throw new IOException("response incomplete");
        }
        
        // make input stream
@ -115,13 +101,18 @@ public class RSSReader extends DefaultHandler {
        try {
            reader = new RSSReader(bais);
        } catch (final Exception e) {
-            Log.logException(e);
-            Log.logWarning("rssReader", "parse exception: " + e.getMessage(), e);
-            return null;
+            throw new IOException("parse exception: " + e.getMessage(), e);
        }
        try { bais.close(); } catch (final IOException e) {}
        return reader;
    }
+    
+    private final static boolean equals(final byte[] buffer, final byte[] pattern) {
+        // compares two byte arrays: true, if pattern appears completely at offset position
+        if (buffer.length < pattern.length) return false;
+        for (int i = 0; i < pattern.length; i++) if (buffer[i] != pattern[i]) return false;
+        return true;
+    }

    @Override
    public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException {
--- a/source/net/yacy/cora/protocol/HttpConnector.java
+++ b/source/net/yacy/cora/protocol/HttpConnector.java
@ -0,0 +1,90 @@
+/**
+ *  HttpConnector
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 25.05.2010 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package net.yacy.cora.protocol;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.httpclient.methods.multipart.Part;
+
+import de.anomic.crawler.retrieval.HTTPLoader;
+import de.anomic.http.client.Client;
+import de.anomic.http.client.RemoteProxyConfig;
+import de.anomic.http.server.HeaderFramework;
+import de.anomic.http.server.RequestHeader;
+import de.anomic.http.server.ResponseContainer;
+
+public class HttpConnector {
+
+    /**
+     * send data to the server named by vhost
+     * 
+     * @param address address of the server
+     * @param vhost name of the server at address which should respond
+     * @param post data to send (name-value-pairs)
+     * @param timeout in milliseconds
+     * @return response body
+     * @throws IOException
+     */
+    public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout) throws IOException {
+        return wput(url, vhost, post, timeout, false);
+    }
+    
+    /**
+     * send data to the server named by vhost
+     * 
+     * @param address address of the server
+     * @param vhost name of the server at address which should respond
+     * @param post data to send (name-value-pairs)
+     * @param timeout in milliseconds
+     * @param gzipBody send with content gzip encoded
+     * @return response body
+     * @throws IOException
+     */
+    public static byte[] wput(final String url, final String vhost, final List<Part> post, final int timeout, final boolean gzipBody) throws IOException {
+        final RequestHeader header = new RequestHeader();
+        header.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
+        header.put(HeaderFramework.HOST, vhost);
+        final Client client = new Client(timeout, header);
+        client.setProxy(proxyConfig());
+        
+        ResponseContainer res = null;
+        byte[] content = null;
+        try {
+            // send request/data
+            res = client.POST(url, post, gzipBody);
+            content = res.getData();
+        } finally {
+            if(res != null) {
+                // release connection
+                res.closeStream();
+            }
+        }
+        return content;
+    }
+
+
+    private static final RemoteProxyConfig proxyConfig() {
+        final RemoteProxyConfig p = RemoteProxyConfig.getRemoteProxyConfig();
+        return ((p != null) && (p.useProxy()) && (p.useProxy4Yacy())) ? p : null;
+    }
+}
--- a/source/net/yacy/cora/services/Search.java
+++ b/source/net/yacy/cora/services/Search.java
@ -0,0 +1,145 @@
+/**
+ *  Search
+ *  Copyright 2010 by Michael Peter Christen
+ *  First released 25.05.2010 at http://yacy.net
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file COPYING.LESSER.
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package net.yacy.cora.services;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSMessage;
+import net.yacy.cora.document.RSSReader;
+import net.yacy.cora.protocol.HttpConnector;
+
+import org.apache.commons.httpclient.methods.multipart.Part;
+import org.apache.commons.httpclient.methods.multipart.StringPart;
+
+public class Search {
+    
+    public static BlockingQueue<RSSMessage> search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
+        BlockingQueue<RSSMessage> queue = new LinkedBlockingQueue<RSSMessage>();
+        searchJob job = new searchJob(rssSearchServiceURL, query, verify, global, timeout, maximumRecords, queue);
+        job.start();
+        return queue;
+    }
+    
+    private final static int recordsPerSession = 10;
+    
+    public static class searchJob extends Thread {
+
+        String urlBase, query;
+        boolean verify, global;
+        long timeout;
+        int startRecord,  maximumRecords;
+        BlockingQueue<RSSMessage> queue;
+
+        public searchJob(String urlBase, String query, boolean verify, boolean global, long timeout, int maximumRecords, BlockingQueue<RSSMessage> queue) {
+            this.urlBase = urlBase;
+            this.query = query;
+            this.verify = verify;
+            this.global = global;
+            this.timeout = timeout;
+            this.startRecord = 0;
+            this.maximumRecords = maximumRecords;
+            this.queue = queue;
+        }
+
+        public void run() {
+            RSSMessage message;
+            mainloop: while (timeout > 0 && maximumRecords > 0) {
+                long st = System.currentTimeMillis();
+                RSSFeed feed;
+                try {
+                    feed = search(urlBase, query, verify, global, timeout, startRecord, recordsPerSession);
+                } catch (IOException e1) {
+                    break mainloop;
+                }
+                if (feed == null || feed.isEmpty()) break mainloop;
+                maximumRecords -= feed.size();
+                innerloop: while (!feed.isEmpty()) {
+                    message = feed.pollMessage();
+                    if (message == null) break innerloop;
+                    try {
+                        queue.put(message);
+                    } catch (InterruptedException e) {
+                        break innerloop;
+                    }
+                }
+                startRecord += recordsPerSession;
+                timeout -= System.currentTimeMillis() - st;
+            }
+            try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {}
+        }
+    }
+    
+    /**
+     * send a query to a yacy public search interface
+     * @param rssSearchServiceURL the target url base (everything before the ? that follows the SRU request syntax properties). can null, then the local peer is used
+     * @param query the query as string
+     * @param startRecord number of first record
+     * @param maximumRecords maximum number of records
+     * @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
+     * @param global if true also search results from other peers are included
+     * @param timeout milliseconds that are waited at maximum for a search result
+     * @return
+     */
+    public static RSSFeed search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException {
+        MultiProtocolURI uri = null;
+        try {
+            uri = new MultiProtocolURI(rssSearchServiceURL);
+        } catch (MalformedURLException e) {
+            throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
+        }
+        
+        // prepare request
+        final List<Part> post = new ArrayList<Part>();
+        post.add(new StringPart("query", query, Charset.defaultCharset().name()));
+        post.add(new StringPart("startRecord", Integer.toString(startRecord), Charset.defaultCharset().name()));
+        post.add(new StringPart("maximumRecords", Long.toString(maximumRecords), Charset.defaultCharset().name()));
+        post.add(new StringPart("verify", verify ? "true" : "false", Charset.defaultCharset().name()));
+        post.add(new StringPart("resource", global ? "global" : "local", Charset.defaultCharset().name()));
+        
+        // send request
+        try {
+            final byte[] result = HttpConnector.wput(rssSearchServiceURL, uri.getHost(), post, (int) timeout);
+            //String debug = new String(result); System.out.println("*** DEBUG: " + debug);
+            final RSSReader reader = RSSReader.parse(result);
+            if (reader == null) {
+                throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
+            }
+            final RSSFeed feed = reader.getFeed();
+            if (feed == null) {
+                // case where the rss reader does not understand the content
+                throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (2)");
+            }
+            return feed;
+        } catch (final IOException e) {
+            throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
+        }
+    }
+    
+}
--- a/source/net/yacy/document/AbstractParser.java
+++ b/source/net/yacy/document/AbstractParser.java
@ -33,7 +33,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.workflow.WorkflowThread;

@ -108,7 +108,7 @@ public abstract class AbstractParser implements Idiom {
        return tempFile;
    }
    
-    public int parseDir(final DigestURI location, final String prefix, final File dir, final Document doc)
+    public int parseDir(final MultiProtocolURI location, final String prefix, final File dir, final Document doc)
            throws ParserException, InterruptedException, IOException {
        if (!dir.isDirectory())
            throw new ParserException("tried to parse ordinary file " + dir + " as directory", location);
@ -122,7 +122,7 @@ public abstract class AbstractParser implements Idiom {
            if (file.isDirectory()) {
                result += parseDir(location, prefix, file, doc);
            } else try {
-                final DigestURI url = DigestURI.newURL(location, "/" + prefix + "/"
+                final MultiProtocolURI url = MultiProtocolURI.newURL(location, "/" + prefix + "/"
                        // XXX: workaround for relative paths within document
                        + file.getPath().substring(file.getPath().indexOf(File.separatorChar) + 1)
                        + "/" + file.getName());
@ -151,7 +151,7 @@ public abstract class AbstractParser implements Idiom {
 	 * @see net.yacy.document.Idiom#parse(de.anomic.net.URL, java.lang.String, byte[])
 	 */
 	public Document parse(
-            final DigestURI location, 
+            final MultiProtocolURI location, 
            final String mimeType,
            final String charset,
            final byte[] source
@ -186,7 +186,7 @@ public abstract class AbstractParser implements Idiom {
 	 * @see net.yacy.document.Idiom#parse(de.anomic.net.URL, java.lang.String, java.io.File)
 	 */
 	public Document parse(
-            final DigestURI location, 
+            final MultiProtocolURI location, 
            final String mimeType,
            final String charset,
 			final File sourceFile
@ -220,7 +220,7 @@ public abstract class AbstractParser implements Idiom {
     * 
     * @see net.yacy.document.Idiom#parse(de.anomic.net.URL, java.lang.String, java.io.InputStream)
     */
-    public abstract Document parse(DigestURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
+    public abstract Document parse(MultiProtocolURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException;
    
    /**
     * Return the name of the parser
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@ -46,10 +46,10 @@ import java.util.Properties;
 import java.util.TreeMap;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.language.Identificator;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.data.word.WordReferenceRow;
 import net.yacy.kelondro.logging.Log;
@ -125,7 +125,7 @@ public final class Condenser {
        this.languageIdentificator = new Identificator();
        
        
-        Map.Entry<DigestURI, String> entry;
+        Map.Entry<MultiProtocolURI, String> entry;
        if (indexText) {
            createCondensement(document.getText());        
            // the phrase counter:
@ -179,7 +179,7 @@ public final class Condenser {
        if (indexMedia) {
            // add anchor descriptions: here, we also add the url components
            // audio
-            Iterator<Map.Entry<DigestURI, String>> i = document.getAudiolinks().entrySet().iterator();
+            Iterator<Map.Entry<MultiProtocolURI, String>> i = document.getAudiolinks().entrySet().iterator();
            while (i.hasNext()) {
                entry = i.next();
                insertTextToWords(entry.getKey().toNormalform(false, false), 99, flag_cat_hasaudio, RESULT_FLAGS, false);
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -45,9 +45,9 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
 import net.yacy.kelondro.util.FileUtils;
@ -55,7 +55,7 @@ import net.yacy.kelondro.util.FileUtils;

 public class Document {
    
-    private final DigestURI source;             // the source url
+    private final MultiProtocolURI source;             // the source url
    private final String mimeType;              // mimeType as taken from http header
    private final String charset;               // the charset of the document
    private final List<String> keywords;        // most resources provide a keyword field
@ -65,24 +65,24 @@ public class Document {
    private final List<String>  sections;       // if present: more titles/headlines appearing in the document
    private final StringBuilder description;    // an abstract, if present: short content description
    private Object text;                        // the clear text, all that is visible
-    private final Map<DigestURI, String> anchors; // all links embedded as clickeable entities (anchor tags)
-    private final HashMap<String, ImageEntry> images; // all visible pictures in document
+    private final Map<MultiProtocolURI, String> anchors; // all links embedded as clickeable entities (anchor tags)
+    private final HashMap<MultiProtocolURI, ImageEntry> images; // all visible pictures in document
    // the anchors and images - Maps are URL-to-EntityDescription mappings.
    // The EntityDescription appear either as visible text in anchors or as alternative
    // text in image tags.
-    private Map<DigestURI, String> hyperlinks, audiolinks, videolinks, applinks;
+    private Map<MultiProtocolURI, String> hyperlinks, audiolinks, videolinks, applinks;
    private Map<String, String> emaillinks;
-    private DigestURI favicon;
+    private MultiProtocolURI favicon;
    private boolean resorted;
    private InputStream textStream;
    private int inboundLinks, outboundLinks; // counters for inbound and outbound links, are counted after calling notifyWebStructure
    private Set<String> languages;
    private boolean indexingDenied;
    
-    public Document(final DigestURI location, final String mimeType, final String charset, final Set<String> languages,
+    public Document(final MultiProtocolURI location, final String mimeType, final String charset, final Set<String> languages,
                    final String[] keywords, final String title, final String author, final String publisher,
                    final String[] sections, final String abstrct,
-                    final Object text, final Map<DigestURI, String> anchors, final HashMap<String, ImageEntry> images,
+                    final Object text, final Map<MultiProtocolURI, String> anchors, final HashMap<MultiProtocolURI, ImageEntry> images,
                    boolean indexingDenied) {
        this.source = location;
        this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
@ -92,8 +92,8 @@ public class Document {
        this.creator = (author == null) ? new StringBuilder(0) : new StringBuilder(author);
        this.sections = (sections == null) ? new LinkedList<String>() : Arrays.asList(sections);
        this.description = (abstrct == null) ? new StringBuilder(0) : new StringBuilder(abstrct);
-        this.anchors = (anchors == null) ? new HashMap<DigestURI, String>(0) : anchors;
-        this.images =  (images == null) ? new HashMap<String, ImageEntry>() : images;
+        this.anchors = (anchors == null) ? new HashMap<MultiProtocolURI, String>(0) : anchors;
+        this.images =  (images == null) ? new HashMap<MultiProtocolURI, ImageEntry>() : images;
        this.publisher = publisher;
        this.hyperlinks = null;
        this.audiolinks = null;
@ -159,7 +159,7 @@ dc_rights
     */
    
    public String dc_title() {
-        return title.toString();
+        return (title == null) ? "" : title.toString();
    }

    public void setTitle(String title) {
@ -167,9 +167,7 @@ dc_rights
    }
    
    public String dc_creator() {
-        if (creator == null)
-            return "";
-        return creator.toString();
+        return (creator == null) ? "" : creator.toString();
    }
    
    public String dc_subject(final char separator) {
@ -196,7 +194,7 @@ dc_rights
    }
    
    public String dc_publisher() {
-        return this.publisher;
+        return this.publisher == null ? "" : this.publisher;
    }
    
    public String dc_format() {
@ -207,7 +205,7 @@ dc_rights
        return this.source.toNormalform(true, false);
    }
    
-    public DigestURI dc_source() {
+    public MultiProtocolURI dc_source() {
        return this.source;
    }
    
@ -282,7 +280,7 @@ dc_rights
        return this.keywords;
    }
    
-    public Map<DigestURI, String> getAnchors() {
+    public Map<MultiProtocolURI, String> getAnchors() {
        // returns all links embedded as anchors (clickeable entities)
        // this is a url(String)/text(String) map
        return anchors;
@ -291,30 +289,30 @@ dc_rights
    
    // the next three methods provide a calculated view on the getAnchors/getImages:
    
-    public Map<DigestURI, String> getHyperlinks() {
+    public Map<MultiProtocolURI, String> getHyperlinks() {
        // this is a subset of the getAnchor-set: only links to other hyperrefs
        if (!resorted) resortLinks();
        return hyperlinks;
    }
    
-    public Map<DigestURI, String> getAudiolinks() {
+    public Map<MultiProtocolURI, String> getAudiolinks() {
        if (!resorted) resortLinks();
        return this.audiolinks;
    }
    
-    public Map<DigestURI, String> getVideolinks() {
+    public Map<MultiProtocolURI, String> getVideolinks() {
        if (!resorted) resortLinks();
        return this.videolinks;
    }
    
-    public HashMap<String, ImageEntry> getImages() {
+    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
        // returns all links enbedded as pictures (visible in document)
        // this resturns a htmlFilterImageEntry collection
        if (!resorted) resortLinks();
        return images;
    }
    
-    public Map<DigestURI, String> getApplinks() {
+    public Map<MultiProtocolURI, String> getApplinks() {
        if (!resorted) resortLinks();
        return this.applinks;
    }
@ -329,18 +327,18 @@ dc_rights
        if (this.resorted) return;
        
        // extract hyperlinks, medialinks and emaillinks from anchorlinks
-        DigestURI url;
+        MultiProtocolURI url;
        String u;
        int extpos, qpos;
        String ext = null;
-        final Iterator<Map.Entry<DigestURI, String>> i = anchors.entrySet().iterator();
-        hyperlinks = new HashMap<DigestURI, String>();
-        videolinks = new HashMap<DigestURI, String>();
-        audiolinks = new HashMap<DigestURI, String>();
-        applinks   = new HashMap<DigestURI, String>();
+        final Iterator<Map.Entry<MultiProtocolURI, String>> i = anchors.entrySet().iterator();
+        hyperlinks = new HashMap<MultiProtocolURI, String>();
+        videolinks = new HashMap<MultiProtocolURI, String>();
+        audiolinks = new HashMap<MultiProtocolURI, String>();
+        applinks   = new HashMap<MultiProtocolURI, String>();
        emaillinks = new HashMap<String, String>();
-        final HashMap<String, ImageEntry> collectedImages = new HashMap<String, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
-        Map.Entry<DigestURI, String> entry;
+        final HashMap<MultiProtocolURI, ImageEntry> collectedImages = new HashMap<MultiProtocolURI, ImageEntry>(); // this is a set that is collected now and joined later to the imagelinks
+        Map.Entry<MultiProtocolURI, String> entry;
        while (i.hasNext()) {
            entry = i.next();
            url = entry.getKey();
@ -393,21 +391,21 @@ dc_rights
        this.resorted = true;
    }
    
-    public static Map<DigestURI, String> allSubpaths(final Collection<?> links) {
+    public static Map<MultiProtocolURI, String> allSubpaths(final Collection<?> links) {
        // links is either a Set of Strings (urls) or a Set of
        // htmlFilterImageEntries
        final HashSet<String> h = new HashSet<String>();
        Iterator<?> i = links.iterator();
        Object o;
-        DigestURI url;
+        MultiProtocolURI url;
        String u;
        int pos;
        int l;
        while (i.hasNext())
            try {
                o = i.next();
-                if (o instanceof DigestURI) url = (DigestURI) o;
-                else if (o instanceof String) url = new DigestURI((String) o, null);
+                if (o instanceof MultiProtocolURI) url = (MultiProtocolURI) o;
+                else if (o instanceof String) url = new MultiProtocolURI((String) o);
                else if (o instanceof ImageEntry) url = ((ImageEntry) o).url();
                else {
                    assert false;
@ -428,11 +426,11 @@ dc_rights
            } catch (final MalformedURLException e) { }
        // now convert the strings to yacyURLs
        i = h.iterator();
-        final HashMap<DigestURI, String> v = new HashMap<DigestURI, String>();
+        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        while (i.hasNext()) {
            u = (String) i.next();
            try {
-                url = new DigestURI(u, null);
+                url = new MultiProtocolURI(u);
                v.put(url, "sub");
            } catch (final MalformedURLException e) {
            }
@ -440,23 +438,23 @@ dc_rights
        return v;
    }
    
-    public static Map<DigestURI, String> allReflinks(final Collection<?> links) {
+    public static Map<MultiProtocolURI, String> allReflinks(final Collection<?> links) {
        // links is either a Set of Strings (with urls) or
        // htmlFilterImageEntries
        // we find all links that are part of a reference inside a url
-        final HashMap<DigestURI, String> v = new HashMap<DigestURI, String>();
+        final HashMap<MultiProtocolURI, String> v = new HashMap<MultiProtocolURI, String>();
        final Iterator<?> i = links.iterator();
        Object o;
-        DigestURI url;
+        MultiProtocolURI url;
        String u;
        int pos;
        loop: while (i.hasNext())
            try {
                o = i.next();
-                if (o instanceof DigestURI)
-                    url = (DigestURI) o;
+                if (o instanceof MultiProtocolURI)
+                    url = (MultiProtocolURI) o;
                else if (o instanceof String)
-                    url = new DigestURI((String) o, null);
+                    url = new MultiProtocolURI((String) o);
                else if (o instanceof ImageEntry)
                    url = ((ImageEntry) o).url();
                else {
@ -469,7 +467,7 @@ dc_rights
                    u = u.substring(pos);
                    while ((pos = u.toLowerCase().indexOf("http://", 7)) > 0)
                        u = u.substring(pos);
-                    url = new DigestURI(u, null);
+                    url = new MultiProtocolURI(u);
                    if (!(v.containsKey(url)))
                        v.put(url, "ref");
                    continue loop;
@ -479,7 +477,7 @@ dc_rights
                    u = "http:/" + u.substring(pos);
                    while ((pos = u.toLowerCase().indexOf("/www.", 7)) > 0)
                        u = "http:/" + u.substring(pos);
-                    url = new DigestURI(u, null);
+                    url = new MultiProtocolURI(u);
                    if (!(v.containsKey(url)))
                        v.put(url, "ref");
                    continue loop;
@ -512,14 +510,14 @@ dc_rights
    /**
     * @return the {@link URL} to the favicon that belongs to the document
     */
-    public DigestURI getFavicon() {
+    public MultiProtocolURI getFavicon() {
    	return this.favicon;
    }
    
    /**
     * @param faviconURL the {@link URL} to the favicon that belongs to the document
     */
-    public void setFavicon(final DigestURI faviconURL) {
+    public void setFavicon(final MultiProtocolURI faviconURL) {
    	this.favicon = faviconURL;
    }
    
--- a/source/net/yacy/document/Idiom.java
+++ b/source/net/yacy/document/Idiom.java
@ -29,7 +29,7 @@ import java.io.File;
 import java.io.InputStream;
 import java.util.Set;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;


 /**
@ -51,7 +51,7 @@ public interface Idiom {
     *  
     * @throws ParserException if the content could not be parsed properly 
     */
-    public Document parse(DigestURI location, String mimeType, String charset, byte[] source)
+    public Document parse(MultiProtocolURI location, String mimeType, String charset, byte[] source)
    throws ParserException, InterruptedException;
    
    /**
@ -65,7 +65,7 @@ public interface Idiom {
     *  
     * @throws ParserException if the content could not be parsed properly 
     */    
-    public Document parse(DigestURI location, String mimeType, String charset, File sourceFile)
+    public Document parse(MultiProtocolURI location, String mimeType, String charset, File sourceFile)
    throws ParserException, InterruptedException;
    
    /**
@ -79,7 +79,7 @@ public interface Idiom {
     *  
     * @throws ParserException if the content could not be parsed properly 
     */    
-    public Document parse(DigestURI location, String mimeType, String charset, InputStream source) 
+    public Document parse(MultiProtocolURI location, String mimeType, String charset, InputStream source) 
    throws ParserException, InterruptedException;
            
    /**
--- a/source/net/yacy/document/ParserException.java
+++ b/source/net/yacy/document/ParserException.java
@ -24,10 +24,10 @@

 package net.yacy.document;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;

 public class ParserException extends Exception {
-    private DigestURI url = null;
+    private MultiProtocolURI url = null;
    
 	private static final long serialVersionUID = 1L;

@ -35,12 +35,12 @@ public class ParserException extends Exception {
        super();
    }

-    public ParserException(final String message, final DigestURI url) {
+    public ParserException(final String message, final MultiProtocolURI url) {
        super(message + "; url = " + url.toNormalform(true, false));
        this.url = url;
    }
    
-    public DigestURI getURL() {
+    public MultiProtocolURI getURL() {
        return this.url;
    }
 }
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@ -40,6 +40,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.bzipParser;
 import net.yacy.document.parser.csvParser;
 import net.yacy.document.parser.docParser;
@ -61,7 +62,6 @@ import net.yacy.document.parser.vsdParser;
 import net.yacy.document.parser.xlsParser;
 import net.yacy.document.parser.zipParser;
 import net.yacy.document.parser.images.genericImageParser;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;

@ -138,7 +138,7 @@ public final class TextParser {
    }
    
    public static Document parseSource(
-            final DigestURI location,
+            final MultiProtocolURI location,
            final String mimeType,
            final String charset,
            final File sourceFile
@ -167,7 +167,7 @@ public final class TextParser {
    }
    
    public static Document parseSource(
-            final DigestURI location,
+            final MultiProtocolURI location,
            String mimeType,
            final String charset,
            final byte[] content
@ -176,7 +176,7 @@ public final class TextParser {
    }
    
    public static Document parseSource(
-            final DigestURI location,
+            final MultiProtocolURI location,
            String mimeType,
            final String charset,
            final long contentLength,
@ -211,7 +211,7 @@ public final class TextParser {
    }

    private static Document parseSource(
-            final DigestURI location,
+            final MultiProtocolURI location,
            String mimeType,
            Idiom idiom,
            final String charset,
@ -233,7 +233,7 @@ public final class TextParser {
    }

    private static Document parseSource(
-            final DigestURI location,
+            final MultiProtocolURI location,
            String mimeType,
            List<Idiom> idioms,
            final String charset,
@ -280,7 +280,7 @@ public final class TextParser {
     * @param mimeType
     * @return returns null if the content is supported. If the content is not supported, return a error string.
     */
-    public static String supports(final DigestURI url, String mimeType) {
+    public static String supports(final MultiProtocolURI url, String mimeType) {
        try {
            // try to get a parser. If this works, we don't need the parser itself, we just return null to show that everything is ok.
            List<Idiom> idioms = idiomParser(url, mimeType);
@ -304,7 +304,7 @@ public final class TextParser {
     * @return a list of Idiom parsers that may be appropriate for the given criteria
     * @throws ParserException
     */
-    private static List<Idiom> idiomParser(final DigestURI url, String mimeType1) throws ParserException {
+    private static List<Idiom> idiomParser(final MultiProtocolURI url, String mimeType1) throws ParserException {
        List<Idiom> idioms = new ArrayList<Idiom>(2);
        
        // check extension
@ -345,7 +345,7 @@ public final class TextParser {
        return null;
    }
    
-    public static String supportsExtension(final DigestURI url) {
+    public static String supportsExtension(final MultiProtocolURI url) {
        String ext = url.getFileExtension().toLowerCase();
        if (ext == null || ext.length() == 0) return null;
        if (denyExtensionx.containsKey(ext)) return "file extension '" + ext + "' is denied (2)";
@ -357,7 +357,7 @@ public final class TextParser {
        return null;
    }
    
-    public static String mimeOf(DigestURI url) {
+    public static String mimeOf(MultiProtocolURI url) {
        return mimeOf(url.getFileExtension());
    }
    
--- a/source/net/yacy/document/parser/bzipParser.java
+++ b/source/net/yacy/document/parser/bzipParser.java
@ -33,12 +33,12 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.FileUtils;

 import org.apache.tools.bzip2.CBZip2InputStream;
@ -75,7 +75,7 @@ public class bzipParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        File tempFile = null;
        try {           
--- a/source/net/yacy/document/parser/csvParser.java
+++ b/source/net/yacy/document/parser/csvParser.java
@ -37,11 +37,11 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;

 /**
 * a parser for comma-separated values
@ -73,7 +73,7 @@ public class csvParser extends AbstractParser implements Idiom {
    }
    
    @Override
-    public Document parse(DigestURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
+    public Document parse(MultiProtocolURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
        // construct a document using all cells of the document
        // the first row is used as headline
        // all lines are artificially terminated by a '.' to separate them as sentence for the condenser.
@ -112,7 +112,7 @@ public class csvParser extends AbstractParser implements Idiom {
        return sb.toString();
    }
    
-    public List<String[]> getTable(DigestURI location, String mimeType, String charset, InputStream source) {
+    public List<String[]> getTable(MultiProtocolURI location, String mimeType, String charset, InputStream source) {
        ArrayList<String[]> rows = new ArrayList<String[]>();
        BufferedReader reader;
        try {
--- a/source/net/yacy/document/parser/docParser.java
+++ b/source/net/yacy/document/parser/docParser.java
@ -32,11 +32,11 @@ import java.io.UnsupportedEncodingException;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;

 import org.apache.poi.hwpf.extractor.WordExtractor;

@ -65,7 +65,7 @@ public class docParser extends AbstractParser implements Idiom {
 		super("Word Document Parser");
 	}

-	public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+	public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {

        final WordExtractor extractor;

--- a/source/net/yacy/document/parser/gzipParser.java
+++ b/source/net/yacy/document/parser/gzipParser.java
@ -34,12 +34,12 @@ import java.util.HashSet;
 import java.util.Set;
 import java.util.zip.GZIPInputStream;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.FileUtils;


@ -74,7 +74,7 @@ public class gzipParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        File tempFile = null;
        try {           
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -44,8 +44,8 @@ import java.util.Properties;

 import javax.swing.event.EventListenerList;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.htmlParser;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;
@ -79,8 +79,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    // class variables: collectors for links
-    private HashMap<DigestURI, String> anchors;
-    private HashMap<String, ImageEntry> images; // urlhash/image relation
+    private HashMap<MultiProtocolURI, String> anchors;
+    private HashMap<MultiProtocolURI, ImageEntry> images; // urlhash/image relation
    private final HashMap<String, String> metas;
    private String title;
    //private String headline;
@ -89,23 +89,23 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    private final EventListenerList htmlFilterEventListeners;
    
    /**
-     * {@link DigestURI} to the favicon that belongs to the document
+     * {@link MultiProtocolURI} to the favicon that belongs to the document
     */
-    private DigestURI favicon;
+    private MultiProtocolURI favicon;
    
    /**
-     * The document root {@link DigestURI} 
+     * The document root {@link MultiProtocolURI} 
     */
-    private DigestURI root;
+    private MultiProtocolURI root;

    @SuppressWarnings("unchecked")
-    public ContentScraper(final DigestURI root) {
+    public ContentScraper(final MultiProtocolURI root) {
        // the root value here will not be used to load the resource.
        // it is only the reference for relative links
        super(linkTags0, linkTags1);
        this.root = root;
-        this.anchors = new HashMap<DigestURI, String>();
-        this.images = new HashMap<String, ImageEntry>();
+        this.anchors = new HashMap<MultiProtocolURI, String>();
+        this.images = new HashMap<MultiProtocolURI, ImageEntry>();
        this.metas = new HashMap<String, String>();
        this.title = "";
        this.headlines = new ArrayList[4];
@ -133,9 +133,9 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (b.length() != 0) content.append(b).append(32);
    }

-    private DigestURI absolutePath(final String relativePath) {
+    private MultiProtocolURI absolutePath(final String relativePath) {
        try {
-            return DigestURI.newURL(root, relativePath);
+            return MultiProtocolURI.newURL(root, relativePath);
        } catch (final Exception e) {
            return null;
        }
@ -149,7 +149,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                if (width > 15 && height > 15) {
                    final float ratio = (float) Math.min(width, height) / Math.max(width, height);
                    if (ratio > 0.4) {
-                        final DigestURI url = absolutePath(tagopts.getProperty("src", ""));
+                        final MultiProtocolURI url = absolutePath(tagopts.getProperty("src", ""));
                        final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height, -1);
                        addImage(images, ie);
                    }
@ -162,7 +162,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            } catch (final NumberFormatException e) {}
        }
        if (tagname.equalsIgnoreCase("base")) try {
-            root = new DigestURI(tagopts.getProperty("href", ""), null);
+            root = new MultiProtocolURI(tagopts.getProperty("href", ""));
        } catch (final MalformedURLException e) {}
        if (tagname.equalsIgnoreCase("frame")) {
            anchors.put(absolutePath(tagopts.getProperty("src", "")), tagopts.getProperty("name",""));
@ -185,7 +185,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            if (href.length() > 0) anchors.put(absolutePath(href), areatitle);
        }
        if (tagname.equalsIgnoreCase("link")) {
-            final DigestURI newLink = absolutePath(tagopts.getProperty("href", ""));
+            final MultiProtocolURI newLink = absolutePath(tagopts.getProperty("href", ""));

            if (newLink != null) {
                final String type = tagopts.getProperty("rel", "");
@ -193,7 +193,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {

                if (type.equalsIgnoreCase("shortcut icon")) {
                    final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1);
-                    images.put(new String(ie.url().hash()), ie);    
+                    images.put(ie.url(), ie);    
                    this.favicon = newLink;
                } else if (!type.equalsIgnoreCase("stylesheet") && !type.equalsIgnoreCase("alternate stylesheet")) {
                    anchors.put(newLink, linktitle);
@ -220,7 +220,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + new String(text));
        if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
            final String href = tagopts.getProperty("href", "");
-            DigestURI url;
+            MultiProtocolURI url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
                final String f = url.getFile();
                final int p = f.lastIndexOf('.');
@ -350,7 +350,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
    }

-    public Map<DigestURI, String> getAnchors() {
+    public Map<MultiProtocolURI, String> getAnchors() {
        // returns a url (String) / name (String) relation
        return anchors;
    }
@ -359,7 +359,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     * get all images
     * @return a map of <urlhash, ImageEntry>
     */
-    public HashMap<String, ImageEntry> getImages() {
+    public HashMap<MultiProtocolURI, ImageEntry> getImages() {
        // this resturns a String(absolute url)/htmlFilterImageEntry - relation
        return images;
    }
@ -369,9 +369,9 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }
    
    /**
-     * @return the {@link DigestURI} to the favicon that belongs to the document
+     * @return the {@link MultiProtocolURI} to the favicon that belongs to the document
     */    
-    public DigestURI getFavicon() {
+    public MultiProtocolURI getFavicon() {
        return this.favicon;
    }

@ -442,7 +442,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (s == null) s = metas.get("dc.description");
        if (s == null) s = "";
        if (s.length() == 0) {
-            return DigestURI.splitpattern.split(getTitle().toLowerCase());
+            return MultiProtocolURI.splitpattern.split(getTitle().toLowerCase());
        }
        if (s.contains(",")) return s.split(" |,");
        if (s.contains(";")) return s.split(" |;");
@ -536,32 +536,32 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (page == null) throw new IOException("no content in file " + file.toString());
        
        // scrape document to look up charset
-        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page),"UTF-8",new DigestURI("http://localhost", null),null,false);
+        final ScraperInputStream htmlFilter = new ScraperInputStream(new ByteArrayInputStream(page),"UTF-8", new MultiProtocolURI("http://localhost"),null,false);
        final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
        
        // scrape content
-        final ContentScraper scraper = new ContentScraper(new DigestURI("http://localhost", null));
+        final ContentScraper scraper = new ContentScraper(new MultiProtocolURI("http://localhost"));
        final Writer writer = new TransformerWriter(null, null, scraper, null, false);
        FileUtils.copy(new ByteArrayInputStream(page), writer, Charset.forName(charset));
        
        return scraper;
    }
    
-    public static void addAllImages(final HashMap<String, ImageEntry> a, final HashMap<String, ImageEntry> b) {
-        final Iterator<Map.Entry<String, ImageEntry>> i = b.entrySet().iterator();
-        Map.Entry<String, ImageEntry> ie;
+    public static void addAllImages(final HashMap<MultiProtocolURI, ImageEntry> a, final HashMap<MultiProtocolURI, ImageEntry> b) {
+        final Iterator<Map.Entry<MultiProtocolURI, ImageEntry>> i = b.entrySet().iterator();
+        Map.Entry<MultiProtocolURI, ImageEntry> ie;
        while (i.hasNext()) {
            ie = i.next();
            addImage(a, ie.getValue());
        }
    }
    
-    public static void addImage(final HashMap<String, ImageEntry> a, final ImageEntry ie) {
-        if (a.containsKey(new String(ie.url().hash()))) {
+    public static void addImage(final HashMap<MultiProtocolURI, ImageEntry> a, final ImageEntry ie) {
+        if (a.containsKey(ie.url())) {
            // in case of a collision, take that image that has the better image size tags
-            if ((ie.height() > 0) && (ie.width() > 0)) a.put(new String(ie.url().hash()), ie);
+            if ((ie.height() > 0) && (ie.width() > 0)) a.put(ie.url(), ie);
        } else {
-            a.put(new String(ie.url().hash()), ie);
+            a.put(ie.url(), ie);
        }
    }
    
--- a/source/net/yacy/document/parser/html/ImageEntry.java
+++ b/source/net/yacy/document/parser/html/ImageEntry.java
@ -26,16 +26,16 @@ package net.yacy.document.parser.html;

 import java.util.Comparator;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;

 public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry> {

-    private final DigestURI url;
+    private final MultiProtocolURI url;
    private final String alt;
    private final int width, height;
    private final long fileSize;
    
-    public ImageEntry(final DigestURI url, final String alt, final int width, final int height, long fileSize) {
+    public ImageEntry(final MultiProtocolURI url, final String alt, final int width, final int height, long fileSize) {
        this.url = url;
        this.alt = alt;
        this.width = width;
@ -43,7 +43,7 @@ public class ImageEntry implements Comparable<ImageEntry>, Comparator<ImageEntry
        this.fileSize = fileSize;
    }

-    public DigestURI url() {
+    public MultiProtocolURI url() {
        return this.url;
    }
    
--- a/source/net/yacy/document/parser/html/ScraperInputStream.java
+++ b/source/net/yacy/document/parser/html/ScraperInputStream.java
@ -35,7 +35,7 @@ import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.util.Properties;

-import net.yacy.kelondro.data.meta.DigestURI;
+import net.yacy.cora.document.MultiProtocolURI;


 public class ScraperInputStream extends InputStream implements ScraperListener {
@ -58,7 +58,7 @@ public class ScraperInputStream extends InputStream implements ScraperListener {
    public ScraperInputStream(
            final InputStream inStream,
            final String inputStreamCharset,
-            final DigestURI rooturl,
+            final MultiProtocolURI rooturl,
            final Transformer transformer,
            final boolean passbyIfBinarySuspect
    ) {
--- a/source/net/yacy/document/parser/htmlParser.java
+++ b/source/net/yacy/document/parser/htmlParser.java
@ -34,6 +34,7 @@ import java.nio.charset.UnsupportedCharsetException;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
@ -41,7 +42,6 @@ import net.yacy.document.ParserException;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ScraperInputStream;
 import net.yacy.document.parser.html.TransformerWriter;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.FileUtils;


@ -84,7 +84,7 @@ public class htmlParser extends AbstractParser implements Idiom {
    
    @Override
    public Document parse(
-            final DigestURI location, 
+            final MultiProtocolURI location, 
            final String mimeType, 
            final String documentCharset, 
            final InputStream sourceStream) throws ParserException, InterruptedException {
@ -136,7 +136,7 @@ public class htmlParser extends AbstractParser implements Idiom {
        return transformScraper(location, mimeType, documentCharset, scraper);
    }

-    private static Document transformScraper(final DigestURI location, final String mimeType, final String charSet, final ContentScraper scraper) {
+    private static Document transformScraper(final MultiProtocolURI location, final String mimeType, final String charSet, final ContentScraper scraper) {
        final String[] sections = new String[scraper.getHeadlines(1).length + scraper.getHeadlines(2).length + scraper.getHeadlines(3).length + scraper.getHeadlines(4).length];
        int p = 0;
        for (int i = 1; i <= 4; i++) for (int j = 0; j < scraper.getHeadlines(i).length; j++) sections[p++] = scraper.getHeadlines(i)[j];
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@ -50,13 +50,13 @@ import com.sun.image.codec.jpeg.JPEGCodec;
 import com.sun.image.codec.jpeg.JPEGDecodeParam;
 import com.sun.image.codec.jpeg.JPEGImageDecoder;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.document.parser.images.bmpParser.IMAGEMAP;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;

@ -88,7 +88,7 @@ public class genericImageParser extends AbstractParser implements Idiom {
    @SuppressWarnings("unchecked")
    @Override
    public Document parse(
-            final DigestURI location, 
+            final MultiProtocolURI location, 
            final String mimeType, 
            final String documentCharset, 
            final InputStream sourceStream) throws ParserException, InterruptedException {
@ -170,11 +170,11 @@ public class genericImageParser extends AbstractParser implements Idiom {
        }        
        
        final HashSet<String> languages = new HashSet<String>();
-        final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
-        final HashMap<String, ImageEntry> images  = new HashMap<String, ImageEntry>();
+        final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+        final HashMap<MultiProtocolURI, ImageEntry> images  = new HashMap<MultiProtocolURI, ImageEntry>();
        // add this image to the map of images
        String infoString = ii.info.toString();
-        images.put(infoString, new ImageEntry(location, "", ii.width, ii.height, -1));
+        images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
        
        if (title == null) title = location.toNormalform(true, true);
        
@ -204,7 +204,7 @@ public class genericImageParser extends AbstractParser implements Idiom {
    }
    
    public static ImageInfo parseJavaImage(
-                            final DigestURI location,
+                            final MultiProtocolURI location,
                            final InputStream sourceStream) throws ParserException {
        BufferedImage image = null;
        try {
@ -222,7 +222,7 @@ public class genericImageParser extends AbstractParser implements Idiom {
    }
    
    public static ImageInfo parseJavaImage(
-                            final DigestURI location,
+                            final MultiProtocolURI location,
                            final BufferedImage image) {
        ImageInfo ii = new ImageInfo(location);
        ii.image = image;
@ -259,12 +259,12 @@ public class genericImageParser extends AbstractParser implements Idiom {
    }
    
    public static class ImageInfo {
-        public DigestURI location;
+        public MultiProtocolURI location;
        public BufferedImage image;
        public StringBuilder info;
        public int height;
        public int width;
-        public ImageInfo(final DigestURI location) {
+        public ImageInfo(final MultiProtocolURI location) {
            this.location = location;
            this.image = null;
            this.info = new StringBuilder();
@ -278,9 +278,9 @@ public class genericImageParser extends AbstractParser implements Idiom {
    public static void main(final String[] args) {
        File image = new File(args[0]);
        genericImageParser parser = new genericImageParser();
-        DigestURI uri;
+        MultiProtocolURI uri;
        try {
-            uri = new DigestURI("http://localhost/" + image.getName());
+            uri = new MultiProtocolURI("http://localhost/" + image.getName());
            Document document = parser.parse(uri, "image/" + uri.getFileExtension(), "UTF-8", new FileInputStream(image));
            System.out.println(document.toString());
        } catch (MalformedURLException e) {
--- a/source/net/yacy/document/parser/odtParser.java
+++ b/source/net/yacy/document/parser/odtParser.java
@ -39,13 +39,13 @@ import java.util.zip.ZipFile;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
 import net.yacy.document.parser.xml.ODContentHandler;
 import net.yacy.document.parser.xml.ODMetaHandler;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.util.FileUtils;

@ -106,7 +106,7 @@ public class odtParser extends AbstractParser implements Idiom {
    }
    
    @Override
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final File dest) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final File dest) throws ParserException, InterruptedException {
        
        Writer writer = null;
        File writerFile = null;
@ -228,7 +228,7 @@ public class odtParser extends AbstractParser implements Idiom {
        }
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        File dest = null;
        try {
            // creating a tempfile
--- a/source/net/yacy/document/parser/ooxmlParser.java
+++ b/source/net/yacy/document/parser/ooxmlParser.java
@ -39,13 +39,13 @@ import java.util.zip.ZipFile;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
 import net.yacy.document.parser.xml.ODContentHandler;
 import net.yacy.document.parser.xml.ODMetaHandler;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;
@ -90,7 +90,7 @@ public class ooxmlParser extends AbstractParser implements Idiom {
    }
    
    @Override
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final File dest) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final File dest) throws ParserException, InterruptedException {
        
        Writer writer = null;
        File writerFile = null;
@ -215,7 +215,7 @@ public class ooxmlParser extends AbstractParser implements Idiom {
        }
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        File dest = null;
        try {
            // creating a tempfile
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@ -44,11 +44,11 @@ import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
 import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
 import org.apache.pdfbox.util.PDFTextStripper;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;
@ -84,7 +84,7 @@ public class pdfParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        // create a pdf parser
        final PDDocument theDocument;
--- a/source/net/yacy/document/parser/pptParser.java
+++ b/source/net/yacy/document/parser/pptParser.java
@ -32,11 +32,11 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;

 import org.apache.poi.hslf.extractor.PowerPointExtractor;
@ -70,7 +70,7 @@ public class pptParser extends AbstractParser implements Idiom {
     * parses the source documents and returns a plasmaParserDocument containing
     * all extracted information about the parsed document
     */ 
-    public Document parse(final DigestURI location, final String mimeType,
+    public Document parse(final MultiProtocolURI location, final String mimeType,
            final String charset, final InputStream source) throws ParserException,
            InterruptedException {
        try {
--- a/source/net/yacy/document/parser/psParser.java
+++ b/source/net/yacy/document/parser/psParser.java
@ -37,11 +37,11 @@ import java.io.InputStreamReader;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.FileUtils;


@ -104,7 +104,7 @@ public class psParser extends AbstractParser implements Idiom {
    
    
    @Override
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final File sourceFile) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final File sourceFile) throws ParserException, InterruptedException {
        
    	File outputFile = null;
        try { 
@ -277,7 +277,7 @@ public class psParser extends AbstractParser implements Idiom {
    	super.reset();
    }

-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        File tempFile = null;
        try {
--- a/source/net/yacy/document/parser/rssParser.java
+++ b/source/net/yacy/document/parser/rssParser.java
@ -40,18 +40,18 @@ import java.util.LinkedList;
 import java.util.Map;
 import java.util.Set;

+import net.yacy.cora.document.Hit;
+import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.document.RSSFeed;
+import net.yacy.cora.document.RSSReader;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.document.content.RSSMessage;
 import net.yacy.document.parser.html.AbstractScraper;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.document.parser.html.TransformerWriter;
-import net.yacy.document.parser.xml.RSSFeed;
-import net.yacy.document.parser.xml.RSSReader;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.io.CharBuffer;
 import net.yacy.kelondro.util.ByteBuffer;
 import net.yacy.kelondro.util.FileUtils;
@ -78,11 +78,11 @@ public class rssParser extends AbstractParser implements Idiom {
 		super("Rich Site Summary/Atom Feed Parser"); 
 	}

-	public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+	public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {

        final LinkedList<String> feedSections = new LinkedList<String>();
-        final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
-        final HashMap<String, ImageEntry> images  = new HashMap<String, ImageEntry>();
+        final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
+        final HashMap<MultiProtocolURI, ImageEntry> images  = new HashMap<MultiProtocolURI, ImageEntry>();
        final ByteBuffer text = new ByteBuffer();
        final CharBuffer authors = new CharBuffer();
        
@ -119,20 +119,20 @@ public class rssParser extends AbstractParser implements Idiom {
        
        if (feed.getImage() != null) {
            try {
-                DigestURI imgURL = new DigestURI(feed.getImage(), null);
-                images.put(new String(imgURL.hash()), new ImageEntry(imgURL, feedTitle, -1, -1, -1));
+                MultiProtocolURI imgURL = new MultiProtocolURI(feed.getImage());
+                images.put(imgURL, new ImageEntry(imgURL, feedTitle, -1, -1, -1));
            } catch (MalformedURLException e) {}
        }            
        
        // loop through the feed items
-        for (final RSSMessage item: feed) {
+        for (final Hit item: feed) {
                // check for interruption
                checkInterruption();
                
    			final String itemTitle = item.getTitle();
-                DigestURI itemURL = null;
+    			MultiProtocolURI itemURL = null;
                try {
-                    itemURL = new DigestURI(item.getLink(), null);
+                    itemURL = new MultiProtocolURI(item.getLink());
                } catch (MalformedURLException e) {
                    continue;
                }
@ -164,12 +164,12 @@ public class rssParser extends AbstractParser implements Idiom {
                        feedSections.add(itemHeadline);
                    }
                    
-                    final Map<DigestURI, String> itemLinks = scraper.getAnchors();
+                    final Map<MultiProtocolURI, String> itemLinks = scraper.getAnchors();
                    if (itemLinks != null && !itemLinks.isEmpty()) {
                        anchors.putAll(itemLinks);
                    }
                    
-                    final HashMap<String, ImageEntry> itemImages = scraper.getImages();
+                    final HashMap<MultiProtocolURI, ImageEntry> itemImages = scraper.getImages();
                    if (itemImages != null && !itemImages.isEmpty()) {
                        ContentScraper.addAllImages(images, itemImages);
                    }
--- a/source/net/yacy/document/parser/rtfParser.java
+++ b/source/net/yacy/document/parser/rtfParser.java
@ -34,11 +34,11 @@ import java.util.Set;
 import javax.swing.text.DefaultStyledDocument;
 import javax.swing.text.rtf.RTFEditorKit;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;


 public class rtfParser extends AbstractParser implements Idiom {
@ -62,7 +62,7 @@ public class rtfParser extends AbstractParser implements Idiom {
 		super("Rich Text Format Parser");  
 	}

-	public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+	public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {

        
 		try {	
--- a/source/net/yacy/document/parser/sevenzipParser.java
+++ b/source/net/yacy/document/parser/sevenzipParser.java
@ -36,12 +36,12 @@ import java.io.OutputStream;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.FileUtils;

@ -69,7 +69,7 @@ public class sevenzipParser extends AbstractParser implements Idiom {
        super("7zip Archive Parser");
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final IInStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final IInStream source) throws ParserException, InterruptedException {
        final Document doc = new Document(location, mimeType, charset, null, null, null, null, null, null, null, (Object)null, null, null, false);
        Handler archive;
        super.theLogger.logFine("opening 7zip archive...");
@ -99,13 +99,13 @@ public class sevenzipParser extends AbstractParser implements Idiom {
    }
    
    @Override
-    public Document parse(final DigestURI location, final String mimeType, final String charset,
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset,
            final byte[] source) throws ParserException, InterruptedException {
        return parse(location, mimeType, charset, new ByteArrayIInStream(source));
    }
    
    @Override
-    public Document parse(final DigestURI location, final String mimeType, final String charset,
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset,
            final File sourceFile) throws ParserException, InterruptedException {
        try {
            return parse(location, mimeType, charset, new MyRandomAccessFile(sourceFile, "r"));
@ -114,7 +114,7 @@ public class sevenzipParser extends AbstractParser implements Idiom {
        }
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset,
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset,
            final InputStream source) throws ParserException, InterruptedException {
        try {
            final ByteArrayOutputStream cfos = new ByteArrayOutputStream();
@ -189,7 +189,7 @@ public class sevenzipParser extends AbstractParser implements Idiom {
                     Document theDoc;
                     // workaround for relative links in file, normally '#' shall be used behind the location, see
                     // below for reversion of the effects
-                     final DigestURI url = DigestURI.newURL(doc.dc_source(), this.prefix + "/" + super.filePath);
+                     final MultiProtocolURI url = MultiProtocolURI.newURL(doc.dc_source(), this.prefix + "/" + super.filePath);
                     final String mime = TextParser.mimeOf(super.filePath.substring(super.filePath.lastIndexOf('.') + 1));
                     theDoc = TextParser.parseSource(url, mime, null, this.cfos.toByteArray());
                     
--- a/source/net/yacy/document/parser/swfParser.java
+++ b/source/net/yacy/document/parser/swfParser.java
@ -33,11 +33,11 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;

 import pt.tumba.parser.swf.SWF2HTML;
@ -74,7 +74,7 @@ public class swfParser extends AbstractParser implements Idiom {
     * parses the source documents and returns a plasmaParserDocument containing
     * all extracted information about the parsed document
     */
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {

        try {
            final SWF2HTML swf2html = new SWF2HTML();
@ -97,7 +97,7 @@ public class swfParser extends AbstractParser implements Idiom {
            final String[] sections =  null;
            final String abstrct = null;
            //TreeSet images = null;
-            final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
+            final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
            int urls = 0;
            int urlStart = -1;
            int urlEnd = 0;
@ -114,7 +114,7 @@ public class swfParser extends AbstractParser implements Idiom {
                urlEnd = contents.indexOf(linebreak,urlStart);
                url = contents.substring(urlStart,urlEnd);
                urlnr = (Integer.valueOf(++urls)).toString();
-                anchors.put(new DigestURI(url, null), urlnr);
+                anchors.put(new MultiProtocolURI(url), urlnr);
                contents = contents.substring(0,urlStart)+contents.substring(urlEnd);
            }

--- a/source/net/yacy/document/parser/tarParser.java
+++ b/source/net/yacy/document/parser/tarParser.java
@ -38,6 +38,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.zip.GZIPInputStream;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
@ -45,7 +46,6 @@ import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.ByteBuffer;
 import net.yacy.kelondro.util.FileUtils;

@ -81,7 +81,7 @@ public class tarParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, InputStream source) throws ParserException, InterruptedException {
        
        long docTextLength = 0;
        OutputStream docText = null;
@ -106,8 +106,8 @@ public class tarParser extends AbstractParser implements Idiom {
            final LinkedList<String> docSections = new LinkedList<String>();
            final StringBuilder docAbstrct = new StringBuilder();

-            final Map<DigestURI, String> docAnchors = new HashMap<DigestURI, String>();
-            final HashMap<String, ImageEntry> docImages = new HashMap<String, ImageEntry>(); 
+            final Map<MultiProtocolURI, String> docAnchors = new HashMap<MultiProtocolURI, String>();
+            final HashMap<MultiProtocolURI, ImageEntry> docImages = new HashMap<MultiProtocolURI, ImageEntry>(); 
                        
            // looping through the contained files
            TarEntry entry;
@ -143,7 +143,7 @@ public class tarParser extends AbstractParser implements Idiom {
                    checkInterruption();
                    
                    // parsing the content                    
-                    subDoc = TextParser.parseSource(DigestURI.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
+                    subDoc = TextParser.parseSource(MultiProtocolURI.newURL(location,"#" + entryName),entryMime,null,subDocTempFile);
                } catch (final ParserException e) {
                    this.theLogger.logInfo("Unable to parse tar file entry '" + entryName + "'. " + e.getMessage());
                } finally {
--- a/source/net/yacy/document/parser/torrentParser.java
+++ b/source/net/yacy/document/parser/torrentParser.java
@ -36,12 +36,12 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.util.BDecoder;
 import net.yacy.kelondro.util.FileUtils;
@ -75,7 +75,7 @@ public class torrentParser extends AbstractParser implements Idiom {
    }
    
    @Override
-    public Document parse(DigestURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
+    public Document parse(MultiProtocolURI location, String mimeType, String charset, InputStream source) throws ParserException, InterruptedException {
        byte[] b = null;
        try {
            b = FileUtils.read(source);
@ -141,7 +141,7 @@ public class torrentParser extends AbstractParser implements Idiom {
        try {
            byte[] b = FileUtils.read(new File(args[0]));
            torrentParser parser = new torrentParser();
-            Document d = parser.parse(new DigestURI("http://localhost/test.torrent", null), null, "utf-8", b);
+            Document d = parser.parse(new MultiProtocolURI("http://localhost/test.torrent"), null, "utf-8", b);
            Condenser c = new Condenser(d, true, true);
            Map<String, Word> w = c.words();
            for (Map.Entry<String, Word> e: w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText);
--- a/source/net/yacy/document/parser/vcfParser.java
+++ b/source/net/yacy/document/parser/vcfParser.java
@ -37,11 +37,11 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.order.Base64Order;

 /**
@ -80,13 +80,13 @@ public class vcfParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI url, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI url, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        try {
            final StringBuilder parsedTitle = new StringBuilder();
            final StringBuilder parsedDataText = new StringBuilder();
            final HashMap<String, String> parsedData = new HashMap<String, String>();
-            final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
+            final HashMap<MultiProtocolURI, String> anchors = new HashMap<MultiProtocolURI, String>();
            final LinkedList<String> parsedNames = new LinkedList<String>();
            
            boolean useLastLine = false;
@ -195,7 +195,7 @@ public class vcfParser extends AbstractParser implements Idiom {
                        parsedData.clear();
                    } else if (key.toUpperCase().startsWith("URL")) {
                        try {
-                            final DigestURI newURL = new DigestURI(value, null);
+                            final MultiProtocolURI newURL = new MultiProtocolURI(value);
                            anchors.put(newURL, newURL.toString());   
                            //parsedData.put(key,value);
                        } catch (final MalformedURLException ex) {/* ignore this */}                                                
--- a/source/net/yacy/document/parser/vsdParser.java
+++ b/source/net/yacy/document/parser/vsdParser.java
@ -31,11 +31,11 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;

 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
@ -82,7 +82,7 @@ public class vsdParser extends AbstractParser implements Idiom {
     * parses the source documents and returns a plasmaParserDocument containing
     * all extracted information about the parsed document
     */
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {

    	Document theDoc = null;
    	
--- a/source/net/yacy/document/parser/xlsParser.java
+++ b/source/net/yacy/document/parser/xlsParser.java
@ -31,11 +31,11 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
 import net.yacy.document.ParserException;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.logging.Log;

 import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
@ -76,7 +76,7 @@ public class xlsParser extends AbstractParser implements Idiom {
     * parses the source documents and returns a plasmaParserDocument containing
     * all extracted information about the parsed document
     */ 
-    public Document parse(final DigestURI location, final String mimeType,
+    public Document parse(final MultiProtocolURI location, final String mimeType,
            final String charset, final InputStream source) throws ParserException,
            InterruptedException {
        return new XLSHSSFListener().parse(location, mimeType, charset, source);
@ -111,7 +111,7 @@ public class xlsParser extends AbstractParser implements Idiom {
         * parses the source documents and returns a Document containing
         * all extracted information about the parsed document
         */ 
-        public Document parse(final DigestURI location, final String mimeType,
+        public Document parse(final MultiProtocolURI location, final String mimeType,
                final String charset, final InputStream source) throws ParserException,
                InterruptedException {
            try {
--- a/source/net/yacy/document/parser/zipParser.java
+++ b/source/net/yacy/document/parser/zipParser.java
@ -39,6 +39,7 @@ import java.util.Set;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.AbstractParser;
 import net.yacy.document.Document;
 import net.yacy.document.Idiom;
@ -46,7 +47,6 @@ import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
-import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.ByteBuffer;
 import net.yacy.kelondro.util.FileUtils;

@ -82,7 +82,7 @@ public class zipParser extends AbstractParser implements Idiom {
        return SUPPORTED_EXTENSIONS;
    }
    
-    public Document parse(final DigestURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
+    public Document parse(final MultiProtocolURI location, final String mimeType, final String charset, final InputStream source) throws ParserException, InterruptedException {
        
        long docTextLength = 0;
        OutputStream docText = null;
@ -95,8 +95,8 @@ public class zipParser extends AbstractParser implements Idiom {
            final StringBuilder docLongTitle = new StringBuilder();   
            final LinkedList<String> docSections = new LinkedList<String>();
            final StringBuilder docAbstrct = new StringBuilder();
-            final Map<DigestURI, String> docAnchors = new HashMap<DigestURI, String>();
-            final HashMap<String, ImageEntry> docImages = new HashMap<String, ImageEntry>();
+            final Map<MultiProtocolURI, String> docAnchors = new HashMap<MultiProtocolURI, String>();
+            final HashMap<MultiProtocolURI, ImageEntry> docImages = new HashMap<MultiProtocolURI, ImageEntry>();
            
            // looping through the contained files
            ZipEntry entry;
@ -129,7 +129,7 @@ public class zipParser extends AbstractParser implements Idiom {
                    FileUtils.copy(zippedContent,subDocTempFile,entry.getSize());                    
                    
                    // parsing the zip file entry
-                    subDoc = TextParser.parseSource(DigestURI.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
+                    subDoc = TextParser.parseSource(MultiProtocolURI.newURL(location,"#" + entryName),entryMime,null, subDocTempFile);
                } catch (final ParserException e) {
                    this.theLogger.logInfo("Unable to parse zip file entry '" + entryName + "'. " + e.getMessage());
                } finally {
--- a/source/net/yacy/kelondro/data/meta/DigestURI.java
+++ b/source/net/yacy/kelondro/data/meta/DigestURI.java
--- a/source/net/yacy/kelondro/util/Domains.java
+++ b/source/net/yacy/kelondro/util/Domains.java
@ -595,7 +595,7 @@ public class Domains {
    }
     
    public static boolean isLocal(final String host) {
-        assert (host != null);
+        if (host == null) return true;

        // FIXME IPv4 only
        // check local ip addresses
--- a/source/net/yacy/repository/Blacklist.java
+++ b/source/net/yacy/repository/Blacklist.java
@ -283,7 +283,7 @@ public class Blacklist {
    }

    public boolean isListed(final String blacklistType, final DigestURI url) {
-
+        if (url.getHost() == null) return false;
        final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);        
        if (!urlHashCache.has(url.hash())) {
            final boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@ -51,6 +51,7 @@ import net.yacy.kelondro.util.FileUtils;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.FTPLoader;
+import de.anomic.crawler.retrieval.FileLoader;
 import de.anomic.crawler.retrieval.HTTPLoader;
 import de.anomic.crawler.retrieval.Request;
 import de.anomic.crawler.retrieval.Response;
@ -73,17 +74,19 @@ public final class LoaderDispatcher {
    private final HTTPLoader httpLoader;
    private final FTPLoader ftpLoader;
    private final SMBLoader smbLoader;
+    private final FileLoader fileLoader;
    private final Log log;
    
    public LoaderDispatcher(final Switchboard sb) {
        this.sb = sb;
-        this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb"}));
+        this.supportedProtocols = new HashSet<String>(Arrays.asList(new String[]{"http","https","ftp","smb","file"}));
        
        // initiate loader objects
        this.log = new Log("LOADER");
        httpLoader = new HTTPLoader(sb, log);
        ftpLoader = new FTPLoader(sb, log);
        smbLoader = new SMBLoader(sb, log);
+        fileLoader = new FileLoader(sb, log);
    }
    
    public boolean isSupportedProtocol(final String protocol) {
@ -251,13 +254,14 @@ public final class LoaderDispatcher {
        }

        // now it's for sure that we will access the target. Remember the access time
-        accessTime.put(host, System.currentTimeMillis());
+        if (host != null) accessTime.put(host, System.currentTimeMillis());
        
        // load resource from the internet
        Response response = null;
        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, acceptOnlyParseable, maxFileSize);
        if (protocol.equals("ftp")) response = ftpLoader.load(request, true);
        if (protocol.equals("smb")) response = smbLoader.load(request, true);
+        if (protocol.equals("file")) response = fileLoader.load(request, true);
        if (response != null) {
            // we got something. Now check if we want to store that to the cache
            // first check looks if we want to store the content to the cache
--- a/test/de/anomic/yacy/yacyURLTest.java
+++ b/test/de/anomic/yacy/yacyURLTest.java
@ -2,13 +2,14 @@ package de.anomic.yacy;

 import java.net.MalformedURLException;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.kelondro.data.meta.DigestURI;

 import junit.framework.TestCase;

 public class yacyURLTest extends TestCase {

-	public void testResolveBackpath() throws MalformedURLException {
+	public void testResolveBackpath() {
 		String[][] testStrings = new String[][] {
 				new String[]{"/..home","/..home"},
 				new String[]{"/test/..home/test.html","/test/..home/test.html"},
@ -23,14 +24,13 @@ public class yacyURLTest extends TestCase {
 				new String[]{"/home/..test/../hallo/../","/home/"}
 		};		
 		
-		DigestURI urlObj = new DigestURI("http://yacy.net");
 		for (int i=0; i < testStrings.length; i++) {
 			// desired conversion result
 			System.out.print("testResolveBackpath: " + testStrings[i][0]);
 			String shouldBe = testStrings[i][1];
 			
 			// conversion result
-			String resolvedURL = urlObj.resolveBackpath(testStrings[i][0]);
+			String resolvedURL = MultiProtocolURI.resolveBackpath(testStrings[i][0]);
 			
 			// test if equal
 			assertEquals(shouldBe,resolvedURL);