merge rc1/master

2024-09-19 00:01:41 +02:00 · 2013-11-07 21:30:17 +01:00 · 2013-11-07 21:30:17 +01:00 · 1437c45383
commit 1437c45383
parent 082c9a98c1
40 changed files with 524 additions and 472 deletions
--- a/defaults/solr/solrconfig.xml
+++ b/defaults/solr/solrconfig.xml
@ -461,19 +461,21 @@
         and old cache.
         -->
        <filterCache class="solr.FastLRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
        
        <!-- Query Result Cache
         
         Caches results of searches - ordered lists of document ids
         (DocList) based on a query, a sort, and the range of documents requested.
         -->
-        <queryResultCache class="solr.LRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        <queryResultCache class="solr.FastLRUCache"
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
        
        <!-- Document Cache
         
@ -481,10 +483,11 @@
         document).  Since Lucene internal document ids are transient,
         this cache will not be autowarmed.
         -->
-        <documentCache class="solr.LRUCache"
-        size="512"
-        initialSize="512"
-        autowarmCount="0"/>
+        <documentCache class="solr.FastLRUCache"
+        size="64"
+        initialSize="64"
+        autowarmCount="4"
+        cleanupThread="true"/>
        
        <!-- Field Value Cache
         
@ -494,9 +497,10 @@
         -->
        <!--
         <fieldValueCache class="solr.FastLRUCache"
-         size="512"
+         size="64"
         autowarmCount="128"
-         showItems="32" />
+         showItems="32"
+         cleanupThread="true"/>
         -->
        
        <!-- Custom Cache
@ -510,11 +514,12 @@
         -->
        <!--
         <cache name="myUserCache"
-         class="solr.LRUCache"
-         size="4096"
-         initialSize="1024"
-         autowarmCount="1024"
+         class="solr.FastLRUCache"
+         size="64"
+         initialSize="64"
+         autowarmCount="64"
         regenerator="com.mycompany.MyRegenerator"
+         cleanupThread="true"
         />
         -->
        
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -797,11 +797,6 @@ search.excludehosth=
 # the cases of nocache, iffresh and ifexist causes an index deletion
 search.verify.delete = true

-# images may be treated either as documents that are shown in search results or as objects
-# that are only visible in special search environments, like image search
-search.excludeintext.image = true
-crawler.load.image = true
-
 # remote search details
 remotesearch.maxcount = 10
 remotesearch.maxtime = 3000
--- a/htroot/ConfigHTCache_p.html
+++ b/htroot/ConfigHTCache_p.html
@ -19,7 +19,7 @@
          <dt><label for="HTCachePath">The path where the cache is stored</label></dt>
          <dd><input name="HTCachePath" id="HTCachePath" type="text" size="20" maxlength="300" value="#[HTCachePath]#" /></dd>
          <dt><label for="actualCacheSize">The current size of the cache</label></dt>
-          <dd><span id="actualCacheSize">#[actualCacheSize]# MB</span></dd>
+          <dd><span id="actualCacheSize">#[actualCacheSize]# MB for #[actualCacheDocCount]# files, #[docSizeAverage]# KB / file in average </span></dd>
          <dt><label for="maxCacheSize">The maximum size of the cache</label></dt>
          <dd><input name="maxCacheSize" id="maxCacheSize" type="text" size="8" maxlength="24" value="#[maxCacheSize]#" /> MB</dd>
          <dt>&nbsp;</dt>
--- a/htroot/ConfigHTCache_p.java
+++ b/htroot/ConfigHTCache_p.java
@ -77,7 +77,9 @@ public class ConfigHTCache_p {
        }

        prop.put("HTCachePath", env.getConfig(SwitchboardConstants.HTCACHE_PATH, SwitchboardConstants.HTCACHE_PATH_DEFAULT));
-        prop.put("actualCacheSize", (Cache.getActualCacheSize() / 1024 / 1024));
+        prop.put("actualCacheSize", Cache.getActualCacheSize() / 1024 / 1024);
+        prop.put("actualCacheDocCount", Cache.getActualCacheDocCount());
+        prop.put("docSizeAverage", Cache.getActualCacheSize() / Cache.getActualCacheDocCount() / 1024);
        prop.put("maxCacheSize", env.getConfigLong(SwitchboardConstants.PROXY_CACHE_SIZE, 64));
        // return rewrite properties
        return prop;
--- a/htroot/ContentAnalysis_p.java
+++ b/htroot/ContentAnalysis_p.java
@ -34,7 +34,7 @@ public class ContentAnalysis_p {

        // clean up all search events
        SearchEventCache.cleanupEvents(true);
-        sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+        sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings

        if (post != null && post.containsKey("EnterDoublecheck")) {
            Ranking.setMinTokenLen(post.getInt("minTokenLen", 3));
--- a/htroot/HostBrowser.java
+++ b/htroot/HostBrowser.java
@ -553,7 +553,6 @@ public class HostBrowser {
                    }
                } catch (final IOException e) {
                }
-                
            }
            this.references_external = (rc_external == null || rc_external.intValue() <= 0) ? 0 : rc_external.intValue();
            this.references_exthosts = (rc_exthosts == null || rc_exthosts.intValue() <= 0) ? 0 : rc_exthosts.intValue();
@ -562,7 +561,7 @@ public class HostBrowser {
            StringBuilder sbi = new StringBuilder();
            int c = 0;
            for (String s: references_internal_urls) {
-                sbi.append("<a href='").append("/HostBrowser.html?path=" + s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
+                sbi.append("<a href='").append(s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
                c++;
                if (c % 80 == 0) sbi.append("<br/>");
            }
@ -570,7 +569,7 @@ public class HostBrowser {
            StringBuilder sbe = new StringBuilder();
            c = 0;
            for (String s: references_external_urls) {
-                sbe.append("<a href='").append("/HostBrowser.html?path=" + s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
+                sbe.append("<a href='").append(s).append("' target='_blank'><img src='env/grafics/i16.gif' alt='info' title='" + s + "' width='12' height='12'/></a>");
                c++;
                if (c % 80 == 0) sbe.append("<br/>");
            }
--- a/htroot/IndexControlURLs_p.html
+++ b/htroot/IndexControlURLs_p.html
@ -193,6 +193,9 @@ function updatepage(str) {
        <dt class="TableCellDark">URL Filter</dt>
        <dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
        </dd>
+        <dt class="TableCellDark">query</dt>
+        <dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
+        </dd>
        <dt class="TableCellDark">Export Format</dt>
        <dd>Only Domain:
            <input type="radio" name="format" value="dom-text" />Plain Text List (domains only)&nbsp;&nbsp;
--- a/htroot/IndexControlURLs_p.java
+++ b/htroot/IndexControlURLs_p.java
@ -261,7 +261,8 @@ public class IndexControlURLs_p {
            final File f = new File(s);
            f.getParentFile().mkdirs();
            final String filter = post.get("exportfilter", ".*");
-            final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
+            final String query = post.get("exportquery", "*:*");
+            final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);

            prop.put("lurlexport_exportfile", s);
            prop.put("lurlexport_urlcount", running.count());
--- a/htroot/RankingSolr_p.java
+++ b/htroot/RankingSolr_p.java
@ -38,7 +38,7 @@ public class RankingSolr_p {

        // clean up all search events
        SearchEventCache.cleanupEvents(true);
-        sb.index.clearCache(); // every time the ranking is changed we need to remove old orderings
+        sb.index.clearCaches(); // every time the ranking is changed we need to remove old orderings

        int profileNr = 0;
        if (post != null) profileNr = post.getInt("profileNr", profileNr);
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -360,7 +360,7 @@ public class yacysearch {

            // check available memory and clean up if necessary
            if ( !MemoryControl.request(8000000L, false) ) {
-                indexSegment.clearCache();
+                indexSegment.clearCaches();
                SearchEventCache.cleanupEvents(false);
            }

--- a/source/net/yacy/cora/document/id/MultiProtocolURL.java
+++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java
@ -57,6 +57,7 @@ import net.yacy.cora.protocol.TimeoutRequest;
 import net.yacy.cora.protocol.ftp.FTPClient;
 import net.yacy.cora.protocol.http.HTTPClient;
 import net.yacy.cora.util.CommonPattern;
+import net.yacy.document.parser.html.CharacterCoding;

 /**
 * MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
@ -66,7 +67,6 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU

    public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators

-    private static final Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
    private static final long serialVersionUID = -1173233022912141884L;
    private static final long SMB_TIMEOUT = 5000;

@ -636,7 +636,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
        } else {
            this.searchpart = this.path.substring(r + 1);
            // strip &amp;
-            Matcher matcher = ampPattern.matcher(this.searchpart);
+            Matcher matcher = CharacterCoding.ampPattern.matcher(this.searchpart);
            while (matcher.find()) {
                this.searchpart = matcher.replaceAll("&");
                matcher.reset(this.searchpart);
--- a/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/AbstractSolrConnector.java
@ -21,7 +21,6 @@
 package net.yacy.cora.federate.solr.connector;

 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -235,7 +234,7 @@ public abstract class AbstractSolrConnector implements SolrConnector {
     * @return a collection of a subset of the ids which exist in the index
     * @throws IOException
     */
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
        if (ids == null || ids.size() == 0) return new HashSet<String>();
        // construct raw query
        final SolrQuery params = new SolrQuery();
--- a/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/CachedSolrConnector.java
@ -61,7 +61,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
        this.missCache = new ConcurrentARC<String, Object>(missCacheMax, partitions);
    }

-    public void clearCache() {
+    public void clearCaches() {
        this.hitCache.clear();
        this.missCache.clear();
        this.documentCache.clear();
@ -70,9 +70,9 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo

    @Override
    public synchronized void close() {
+        this.clearCaches();
        if (this.solr != null) this.solr.close();
        this.solr = null;
-        this.clearCache();
    }

    /**
@ -81,7 +81,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
     */
    @Override
    public void clear() throws IOException {
-        this.clearCache();
+        this.clearCaches();
        if (this.solr != null) this.solr.clear();
    }

@ -119,7 +119,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo

    @Override
    public void deleteByQuery(final String querystring) throws IOException {
-        this.clearCache();
+        this.clearCaches();
        this.solr.deleteByQuery(querystring);
    }

@ -261,7 +261,7 @@ public class CachedSolrConnector extends AbstractSolrConnector implements SolrCo
    }
    
    private void addToCache(SolrDocumentList list, boolean doccache) {
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
        for (final SolrDocument solrdoc: list) {
            addToCache(solrdoc, doccache);
        }
--- a/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/ConcurrentUpdateSolrConnector.java
@ -118,6 +118,12 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
        ensureAliveUpdateHandler();
    }

+    @Override
+    public void clearCaches() {
+        this.connector.clearCaches();
+        this.idCache.clear();
+    }
+
    /**
     * used for debugging
     */
@ -326,10 +332,11 @@ public class ConcurrentUpdateSolrConnector implements SolrConnector {
    }

    @Override
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
        HashSet<String> e = new HashSet<String>();
        if (ids == null || ids.size() == 0) return e;
-        Collection<String> idsC = new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : e;
+        Set<String> idsC = new HashSet<String>();
        for (String id: ids) {
            if (this.idCache.has(ASCII.getBytes(id))) {cacheSuccessSign(); e.add(id); continue;}
            if (existIdFromDeleteQueue(id)) {cacheSuccessSign(); continue;}
--- a/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/EmbeddedSolrConnector.java
@ -22,7 +22,6 @@
 package net.yacy.cora.federate.solr.connector;

 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.BlockingQueue;
@ -35,6 +34,7 @@ import net.yacy.search.schema.CollectionSchema;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.search.Query;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.response.QueryResponse;
@ -48,10 +48,14 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.SearchHandler;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequestBase;
+import org.apache.solr.request.UnInvertedField;
 import org.apache.solr.response.ResultContext;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.QueryResultKey;
+import org.apache.solr.search.SolrCache;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;

@ -89,6 +93,22 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
        super.init(this.instance.getServer(coreName));
    }

+    public void clearCaches() {
+        SolrConfig solrConfig = this.core.getSolrConfig();
+        @SuppressWarnings("unchecked")
+        SolrCache<String, UnInvertedField> fieldValueCache = solrConfig.fieldValueCacheConfig == null ? null : solrConfig.fieldValueCacheConfig.newInstance();
+        if (fieldValueCache != null) fieldValueCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<Query, DocSet> filterCache= solrConfig.filterCacheConfig == null ? null : solrConfig.filterCacheConfig.newInstance();
+        if (filterCache != null) filterCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<QueryResultKey, DocList> queryResultCache = solrConfig.queryResultCacheConfig == null ? null : solrConfig.queryResultCacheConfig.newInstance();
+        if (queryResultCache != null) queryResultCache.clear();
+        @SuppressWarnings("unchecked")
+        SolrCache<Integer, Document> documentCache = solrConfig.documentCacheConfig == null ? null : solrConfig.documentCacheConfig.newInstance();
+        if (documentCache != null) documentCache.clear();
+    }
+    
    public SolrInstance getInstance() {
        return this.instance;
    }
@ -224,9 +244,9 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
    }
    
    @Override
-    public Set<String> existsByIds(Collection<String> ids) {
+    public Set<String> existsByIds(Set<String> ids) {
        if (ids == null || ids.size() == 0) return new HashSet<String>();
-        if (ids.size() == 1 && ids instanceof Set) return existsById(ids.iterator().next()) ? (Set<String>) ids : new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
        StringBuilder sb = new StringBuilder(); // construct something like "({!raw f=id}Ij7B63g-gSHA) OR ({!raw f=id}PBcGI3g-gSHA)"
        for (String id: ids) {
            sb.append("({!raw f=").append(CollectionSchema.id.getSolrFieldName()).append('}').append(id).append(") OR ");
--- a/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/MirrorSolrConnector.java
@ -53,6 +53,12 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
        this.solr0 = solr0;
        this.solr1 = solr1;
    }
+
+    @Override
+    public void clearCaches() {
+        if (this.solr0 != null) this.solr0.clearCaches();
+        if (this.solr1 != null) this.solr1.clearCaches();
+    }
    
    public boolean isConnected0() {
        return this.solr0 != null;
@ -347,7 +353,9 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
    }

    @Override
-    public Set<String> existsByIds(Collection<String> ids) throws IOException {
+    public Set<String> existsByIds(Set<String> ids) throws IOException {
+        if (ids == null || ids.size() == 0) return new HashSet<String>();
+        if (ids.size() == 1) return existsById(ids.iterator().next()) ? ids : new HashSet<String>();
        if (this.solr0 != null && this.solr1 == null) return this.solr0.existsByIds(ids);
        if (this.solr0 == null && this.solr1 != null) return this.solr1.existsByIds(ids);
        Set<String> s = new HashSet<String>();
--- a/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/RemoteSolrConnector.java
@ -71,6 +71,11 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
        super.close();
    }

+    @Override
+    public void clearCaches() {
+        // we do not have a direct access to the caches here, thus we simply do nothing.
+    }
+
    @Override
    public QueryResponse getResponseByParams(ModifiableSolrParams params) throws IOException {
        // during the solr query we set the thread name to the query string to get more debugging info in thread dumps
@ -134,4 +139,5 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
        }
        System.exit(0);
    }
+
 }
--- a/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrConnector.java
@ -36,7 +36,12 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;

 public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
-
+   
+    /**
+     * clear all caches: inside solr and ouside solr within the implementations of this interface
+     */
+    public void clearCaches();
+    
    /**
     * get the size of the index
     * @return number of results if solr is queries with a catch-all pattern
@ -106,7 +111,7 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
     * @return a collection of a subset of the ids which exist in the index
     * @throws IOException
     */
-    public Set<String> existsByIds(Collection<String> ids) throws IOException;
+    public Set<String> existsByIds(Set<String> ids) throws IOException;
    
    /**
     * check if a given document exists in solr
--- a/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
+++ b/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java
@ -64,7 +64,7 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
    public SolrServer getServer() {
        return this.server;
    }
-
+    
    @Override
    public void commit(final boolean softCommit) {
        synchronized (this.server) {
--- a/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
+++ b/source/net/yacy/cora/federate/solr/instance/InstanceMirror.java
@ -24,7 +24,6 @@ import java.util.Collection;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;

-import net.yacy.cora.federate.solr.connector.CachedSolrConnector;
 import net.yacy.cora.federate.solr.connector.ConcurrentUpdateSolrConnector;
 import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
 import net.yacy.cora.federate.solr.connector.MirrorSolrConnector;
@ -161,9 +160,9 @@ public class InstanceMirror {
        return msc;
    }
    
-    public void clearCache() {
+    public void clearCaches() {
        for (SolrConnector csc: this.connectorCache.values()) {
-            if (csc instanceof CachedSolrConnector) ((CachedSolrConnector) csc).clearCache();
+            csc.clearCaches();
        }
        for (EmbeddedSolrConnector ssc: this.embeddedCache.values()) ssc.commit(true);
    }
--- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
+++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java
@ -1,195 +1,193 @@
-/**
- *  HTMLResponseWriter
- *  Copyright 2013 by Michael Peter Christen
- *  First released 09.06.2013 at http://yacy.net
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public License
- *  along with this program in the file lgpl21.txt
- *  If not, see <http://www.gnu.org/licenses/>.
- */
-
-package net.yacy.cora.federate.solr.responsewriter;
-
-import java.io.IOException;
-import java.io.Writer;
-import java.util.Date;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Pattern;
-
-import net.yacy.cora.federate.solr.SolrType;
-import net.yacy.search.schema.CollectionSchema;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexableField;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.util.XML;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.QueryResponseWriter;
-import org.apache.solr.response.ResultContext;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.schema.FieldType;
-import org.apache.solr.schema.IndexSchema;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.TextField;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocList;
-import org.apache.solr.search.SolrIndexSearcher;
-
-public class HTMLResponseWriter implements QueryResponseWriter {
-
-    private static final Set<String> DEFAULT_FIELD_LIST = null;
-    private static final Pattern dqp = Pattern.compile("\"");
-    
-    public HTMLResponseWriter() {
-        super();
-    }
-
-    @Override
-    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
-        return "text/html";
-    }
-
-    @Override
-    public void init(@SuppressWarnings("rawtypes") NamedList n) {
-    }
-
-    @Override
-    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
-        NamedList<?> values = rsp.getValues();
-        assert values.get("responseHeader") != null;
-        assert values.get("response") != null;
-
-        writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
-        //writer.write("<!--\n");
-        //writer.write("this is a XHTML+RDFa file. It contains RDF annotations with dublin core properties\n");
-        //writer.write("you can validate it with http://validator.w3.org/\n");
-        //writer.write("-->\n");
-        writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\"\n");
-        writer.write("      xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
-        writer.write("      xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
-        writer.write("      xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n");
-        writer.write("<head profile=\"http://www.w3.org/2003/g/data-view\">\n");
-        //writer.write("<link rel=\"transformation\" href=\"http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl\"/>\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
-        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
-        NamedList<Object> paramsList = request.getOriginalParams().toNamedList();
-        paramsList.remove("wt");
-        String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
-        writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
-        writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</div>\n");
-      
-        DocList response = ((ResultContext) values.get("response")).docs;
-        final int sz = response.size();
-        if (sz > 0) {
-            SolrIndexSearcher searcher = request.getSearcher();
-            DocIterator iterator = response.iterator();
-            IndexSchema schema = request.getSchema();
-
-            int id = iterator.nextDoc();
-            Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
-            LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
-            
-            String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
-            if (sz == 1) {
-                writer.write("<title>" + title + "</title>\n</head><body>\n");
-            } else {
-                writer.write("<title>Document List</title>\n</head><body>\n");
-            }
-            writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
-            writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</span></div>\n");
-
-            writeDoc(writer, tdoc, title);
-
-            while (iterator.hasNext()) {
-                id = iterator.nextDoc();
-                doc = searcher.doc(id, DEFAULT_FIELD_LIST);
-                tdoc = translateDoc(schema, doc);
-                title = tdoc.get(CollectionSchema.title.getSolrFieldName());
-                writeDoc(writer, tdoc, title);
-            }
-        } else {
-            writer.write("<title>No Document Found</title>\n</head><body>\n");
-        }
-       
-        writer.write("</body></html>\n");
-    }
-
-    private static final void writeDoc(Writer writer, LinkedHashMap<String, String> tdoc, String title) throws IOException {
-        writer.write("<form name=\"yacydoc" + title + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
-        writer.write("<fieldset>\n");
-        writer.write("<h1 property=\"dc:Title\">" + title + "</h1>\n");
-        writer.write("<dl>\n");
-        for (Map.Entry<String, String> entry: tdoc.entrySet()) {
-            writer.write("<dt>");
-            writer.write(entry.getKey());
-            writer.write("</dt><dd>");
-            XML.escapeAttributeValue(entry.getValue(), writer);
-            writer.write("</dd>\n");
-        }
-        writer.write("</dl>\n");
-        writer.write("</fieldset>\n");
-        writer.write("</form>\n");
-    }
-    
-    static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) {
-        List<IndexableField> fields = doc.getFields();
-        int sz = fields.size();
-        int fidx1 = 0, fidx2 = 0;
-        LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
-        while (fidx1 < sz) {
-            IndexableField value = fields.get(fidx1);
-            String fieldName = value.name();
-            fidx2 = fidx1 + 1;
-            while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) {
-                fidx2++;
-            }
-            SchemaField sf = schema.getFieldOrNull(fieldName);
-            if (sf == null) sf = new SchemaField(fieldName, new TextField());
-            FieldType type = sf.getType();
-            
-            if (fidx1 + 1 == fidx2) {
-                if (sf.multiValued()) {
-                    String sv = value.stringValue();
-                    kv.put(fieldName, field2string(type, sv));
-                } else {
-                    kv.put(fieldName, field2string(type, value.stringValue()));
-                }
-            } else {
-                for (int i = fidx1; i < fidx2; i++) {
-                    String sv = fields.get(i).stringValue();
-                    kv.put(fieldName + "_" + i, field2string(type, sv));
-                }
-            }
-            
-            fidx1 = fidx2;
-        }
-        return kv;
-    }
-
-    @SuppressWarnings("deprecation")
-    private static String field2string(final FieldType type, final String value) {
-        String typeName = type.getTypeName();
-        if (typeName.equals(SolrType.bool.printName())) {
-            return "F".equals(value) ? "false" : "true";
-        } else if (typeName.equals(SolrType.date.printName())) {
-            return org.apache.solr.schema.DateField.formatExternal(new Date(Long.parseLong(value))); // this is declared deprecated in solr 4.2.1 but is still used as done here
-        }
-        return value;
-    }
-
-    // XML.escapeCharData(val, writer);
-}
+/**
+ *  HTMLResponseWriter
+ *  Copyright 2013 by Michael Peter Christen
+ *  First released 09.06.2013 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.federate.solr.responsewriter;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import net.yacy.cora.federate.solr.SolrType;
+import net.yacy.search.schema.CollectionSchema;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.XML;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.QueryResponseWriter;
+import org.apache.solr.response.ResultContext;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TextField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.SolrIndexSearcher;
+
+public class HTMLResponseWriter implements QueryResponseWriter {
+
+    private static final Set<String> DEFAULT_FIELD_LIST = null;
+    private static final Pattern dqp = Pattern.compile("\"");
+    
+    public HTMLResponseWriter() {
+        super();
+    }
+
+    @Override
+    public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
+        return "text/html";
+    }
+
+    @Override
+    public void init(@SuppressWarnings("rawtypes") NamedList n) {
+    }
+
+    @Override
+    public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
+        NamedList<?> values = rsp.getValues();
+        assert values.get("responseHeader") != null;
+        assert values.get("response") != null;
+
+        writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
+        //writer.write("<!--\n");
+        //writer.write("this is a XHTML+RDFa file. It contains RDF annotations with dublin core properties\n");
+        //writer.write("you can validate it with http://validator.w3.org/\n");
+        //writer.write("-->\n");
+        writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\"\n");
+        writer.write("      xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
+        writer.write("      xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
+        writer.write("      xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n");
+        writer.write("<head profile=\"http://www.w3.org/2003/g/data-view\">\n");
+        //writer.write("<link rel=\"transformation\" href=\"http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl\"/>\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
+        writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
+        NamedList<Object> paramsList = request.getOriginalParams().toNamedList();
+        paramsList.remove("wt");
+        String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
+
+        DocList response = ((ResultContext) values.get("response")).docs;
+        final int sz = response.size();
+        if (sz > 0) {
+            SolrIndexSearcher searcher = request.getSearcher();
+            DocIterator iterator = response.iterator();
+            IndexSchema schema = request.getSchema();
+
+            int id = iterator.nextDoc();
+            Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
+            LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
+            
+            String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+            if (sz == 1) {
+                writer.write("<title>" + title + "</title>\n</head><body>\n");
+            } else {
+                writer.write("<title>Document List</title>\n</head><body>\n");
+            }
+            writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
+            writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see this page as XML.</span></div>\n");
+
+            writeDoc(writer, tdoc, title);
+
+            while (iterator.hasNext()) {
+                id = iterator.nextDoc();
+                doc = searcher.doc(id, DEFAULT_FIELD_LIST);
+                tdoc = translateDoc(schema, doc);
+                title = tdoc.get(CollectionSchema.title.getSolrFieldName());
+                writeDoc(writer, tdoc, title);
+            }
+        } else {
+            writer.write("<title>No Document Found</title>\n</head><body>\n");
+        }
+       
+        writer.write("</body></html>\n");
+    }
+
+    private static final void writeDoc(Writer writer, LinkedHashMap<String, String> tdoc, String title) throws IOException {
+        writer.write("<form name=\"yacydoc" + title + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
+        writer.write("<fieldset>\n");
+        writer.write("<h1 property=\"dc:Title\">" + title + "</h1>\n");
+        writer.write("<dl>\n");
+        for (Map.Entry<String, String> entry: tdoc.entrySet()) {
+            writer.write("<dt>");
+            writer.write(entry.getKey());
+            writer.write("</dt><dd>");
+            XML.escapeAttributeValue(entry.getValue(), writer);
+            writer.write("</dd>\n");
+        }
+        writer.write("</dl>\n");
+        writer.write("</fieldset>\n");
+        writer.write("</form>\n");
+    }
+    
+    static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) {
+        List<IndexableField> fields = doc.getFields();
+        int sz = fields.size();
+        int fidx1 = 0, fidx2 = 0;
+        LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
+        while (fidx1 < sz) {
+            IndexableField value = fields.get(fidx1);
+            String fieldName = value.name();
+            fidx2 = fidx1 + 1;
+            while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) {
+                fidx2++;
+            }
+            SchemaField sf = schema.getFieldOrNull(fieldName);
+            if (sf == null) sf = new SchemaField(fieldName, new TextField());
+            FieldType type = sf.getType();
+            
+            if (fidx1 + 1 == fidx2) {
+                if (sf.multiValued()) {
+                    String sv = value.stringValue();
+                    kv.put(fieldName, field2string(type, sv));
+                } else {
+                    kv.put(fieldName, field2string(type, value.stringValue()));
+                }
+            } else {
+                for (int i = fidx1; i < fidx2; i++) {
+                    String sv = fields.get(i).stringValue();
+                    kv.put(fieldName + "_" + i, field2string(type, sv));
+                }
+            }
+            
+            fidx1 = fidx2;
+        }
+        return kv;
+    }
+
+    @SuppressWarnings("deprecation")
+    private static String field2string(final FieldType type, final String value) {
+        String typeName = type.getTypeName();
+        if (typeName.equals(SolrType.bool.printName())) {
+            return "F".equals(value) ? "false" : "true";
+        } else if (typeName.equals(SolrType.date.printName())) {
+            return org.apache.solr.schema.DateField.formatExternal(new Date(Long.parseLong(value))); // this is declared deprecated in solr 4.2.1 but is still used as done here
+        }
+        return value;
+    }
+
+    // XML.escapeCharData(val, writer);
+}
--- a/source/net/yacy/crawler/CrawlStacker.java
+++ b/source/net/yacy/crawler/CrawlStacker.java
@ -55,6 +55,7 @@ import net.yacy.crawler.retrieval.HTTPLoader;
 import net.yacy.crawler.retrieval.Request;
 import net.yacy.crawler.retrieval.SMBLoader;
 import net.yacy.crawler.robots.RobotsTxt;
+import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.citation.CitationReference;
 import net.yacy.kelondro.rwi.IndexCell;
 import net.yacy.kelondro.workflow.WorkflowProcessor;
@ -347,17 +348,10 @@ public final class CrawlStacker {

        // check availability of parser and maxfilesize
        String warning = null;
-        boolean loadImages = Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-        if (!loadImages && Switchboard.getSwitchboard().getConfig(SwitchboardConstants.CRAWLER_LOAD_IMAGE, "").equals("true;")) {
-            // dammit semicolon
-            // TODO: remove this shit later
-            Switchboard.getSwitchboard().setConfig(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-            loadImages = true;
-        }
        ContentDomain contentDomain = entry.url().getContentDomainFromExt();
        if ((maxFileSize >= 0 && entry.size() > maxFileSize) ||
            contentDomain == ContentDomain.APP  ||
-            (!loadImages && contentDomain == ContentDomain.IMAGE) ||
+            (contentDomain == ContentDomain.IMAGE && TextParser.supportsExtension(entry.url()) != null) ||
            contentDomain == ContentDomain.AUDIO  ||
            contentDomain == ContentDomain.VIDEO ||
            contentDomain == ContentDomain.CTRL) {
--- a/source/net/yacy/crawler/data/Cache.java
+++ b/source/net/yacy/crawler/data/Cache.java
@ -182,6 +182,14 @@ public final class Cache {
    public static long getActualCacheSize() {
        return fileDBunbuffered.length();
    }
+    
+    /**
+     * get the current actual cache size
+     * @return
+     */
+    public static long getActualCacheDocCount() {
+        return fileDBunbuffered.size();
+    }

    /**
     * close the databases
--- a/source/net/yacy/data/BookmarksDB.java
+++ b/source/net/yacy/data/BookmarksDB.java
@ -41,7 +41,10 @@ import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.id.DigestURL;
 import net.yacy.cora.order.NaturalOrder;
 import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.cora.util.SpaceExceededException;
 import net.yacy.kelondro.blob.MapHeap;
+import net.yacy.kelondro.data.meta.URIMetadataRow;
+import net.yacy.kelondro.index.RowHandleSet;

 public class BookmarksDB {

@ -147,11 +150,6 @@ public class BookmarksDB {
            ConcurrentLog.logException(e);
        }
    }
-    public String addBookmark(final Bookmark bookmark){
-        saveBookmark(bookmark);
-        return bookmark.getUrlHash();
-
-    }

    public Bookmark getBookmark(final String urlHash) throws IOException {
        try {
@ -214,18 +212,13 @@ public class BookmarksDB {
        final TreeSet<String> set=new TreeSet<String>(new bookmarkComparator(true));
        final String tagHash=BookmarkHelper.tagHash(tagName);
        final Tag tag=getTag(tagHash);
-        Set<String> hashes=new HashSet<String>();
-        if (tag != null) {
-            hashes=getTag(tagHash).getUrlHashes();
-        }
+        RowHandleSet hashes = tag == null ? new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10) : tag.getUrlHashes();
        if (priv) {
-            set.addAll(hashes);
+            for (byte[] hash: hashes) set.add(ASCII.String(hash));
        } else {
-        	final Iterator<String> it=hashes.iterator();
-            Bookmark bm;
-            while(it.hasNext()){
+            for (byte[] hash: hashes) {
                try {
-                    bm = getBookmark(it.next());
+                    Bookmark bm = getBookmark(ASCII.String(hash));
                    if (bm != null && bm.getPublic()) {
                        set.add(bm.getUrlHash());
                    }
@ -249,7 +242,7 @@ public class BookmarksDB {
     * retrieve an object of type Tag from the the tagCache, if object is not cached return loadTag(hash)
     * @param hash an object of type String, containing a tagHash
     */
-    public Tag getTag(final String hash){
+    private Tag getTag(final String hash){
        return this.tags.get(hash); //null if it does not exists
    }

@ -257,7 +250,7 @@ public class BookmarksDB {
     * store a Tag in tagsTable or remove an empty tag
     * @param tag an object of type Tag to be stored/removed
     */
-    public void putTag(final Tag tag){
+    private void putTag(final Tag tag){
    	if (tag == null) return;
        if (tag.isEmpty()) {
            this.tags.remove(tag.getTagHash());
@ -266,7 +259,7 @@ public class BookmarksDB {
        }
    }

-    public void removeTag(final String hash) {
+    private void removeTag(final String hash) {
        this.tags.remove(hash);
    }

@ -301,7 +294,7 @@ public class BookmarksDB {
    	return set.iterator();
    }

-    public Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
+    private Iterator<Tag> getTagIterator(final String tagName, final boolean priv, final int comp) {
    	final TreeSet<Tag> set=new TreeSet<Tag>((comp == SORT_SIZE) ? tagSizeComparator : tagComparator);
    	Iterator<String> it=null;
    	final Iterator<String> bit=getBookmarksIterator(tagName, priv);
@ -347,14 +340,14 @@ public class BookmarksDB {

    	final Tag oldTag=getTag(BookmarkHelper.tagHash(oldName));
    	if (oldTag != null) {
-            final Set<String> urlHashes = oldTag.getUrlHashes();	// preserve urlHashes of oldTag
+            final RowHandleSet urlHashes = oldTag.getUrlHashes();	// preserve urlHashes of oldTag
            removeTag(BookmarkHelper.tagHash(oldName));							// remove oldHash from TagsDB

            Bookmark bookmark;
            Set<String> tagSet = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
-            for (final String urlHash : urlHashes) {									// looping through all bookmarks which were tagged with oldName
+            for (final byte[] urlHash : urlHashes) {									// looping through all bookmarks which were tagged with oldName
                try {
-                    bookmark = getBookmark(urlHash);
+                    bookmark = getBookmark(ASCII.String(urlHash));
                    tagSet = bookmark.getTags();
                    tagSet.remove(oldName);
                    bookmark.setTags(tagSet, true);                     // might not be needed, but doesn't hurt
@ -371,9 +364,9 @@ public class BookmarksDB {
    public void addTag(final String selectTag, final String newTag) {

    	Bookmark bookmark;
-    	for (final String urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) {	// looping through all bookmarks which were tagged with selectTag
+    	for (final byte[] urlHash : getTag(BookmarkHelper.tagHash(selectTag)).getUrlHashes()) {	// looping through all bookmarks which were tagged with selectTag
            try {
-                bookmark = getBookmark(urlHash);
+                bookmark = getBookmark(ASCII.String(urlHash));
                bookmark.addTag(newTag);
                saveBookmark(bookmark);
            } catch (final IOException e) {
@ -389,51 +382,24 @@ public class BookmarksDB {
     * Subclass of bookmarksDB, which provides the Tag object-type
     */
    public class Tag {
-        public static final String URL_HASHES = "urlHashes";
-        public static final String TAG_NAME =  "tagName";
        private final String tagHash;
-        private final Map<String, String> mem;
-        private Set<String> urlHashes;
+        private final String tagName;
+        private RowHandleSet urlHashes;

-        public Tag(final String hash, final Map<String, String> map){
-            this.tagHash = hash;
-            this.mem = map;
-            if (this.mem.containsKey(URL_HASHES)) {
-                this.urlHashes = ListManager.string2set(this.mem.get(URL_HASHES));
-            } else {
-                this.urlHashes = new HashSet<String>();
-            }
-        }
-
-        public Tag(final String name, final HashSet<String> entries){
+        private Tag(final String name) {
            this.tagHash = BookmarkHelper.tagHash(name);
-            this.mem = new HashMap<String, String>();
-            //mem.put(URL_HASHES, listManager.arraylist2string(entries));
-            this.urlHashes = entries;
-            this.mem.put(TAG_NAME, name);
-        }
-
-        public Tag(final String name){
-            this(name, new HashSet<String>());
-        }
-
-        public Map<String, String> getMap(){
-            this.mem.put(URL_HASHES, ListManager.collection2string(this.urlHashes));
-            return this.mem;
+            this.tagName = name;
+            this.urlHashes = new RowHandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 10);
        }

        /**
         * get the lowercase Tagname
         */
        public String getTagName(){
-            /*if(this.mem.containsKey(TAG_NAME)){
-                return (String) this.mem.get(TAG_NAME);
-            }
-            return "";*/
            return getFriendlyName().toLowerCase();
        }

-        public String getTagHash(){
+        private String getTagHash(){
            return this.tagHash;
        }

@ -441,37 +407,33 @@ public class BookmarksDB {
         * @return the tag name, with all uppercase chars
         */
        public String getFriendlyName(){
-            /*if(this.mem.containsKey(TAG_FRIENDLY_NAME)){
-                return (String) this.mem.get(TAG_FRIENDLY_NAME);
-            }
-            return getTagName();*/
-            if(this.mem.containsKey(TAG_NAME)){
-                return this.mem.get(TAG_NAME);
-            }
-            return "notagname";
+            return this.tagName;
        }

-        public Set<String> getUrlHashes(){
+        private RowHandleSet getUrlHashes(){
            return this.urlHashes;
        }

-        public boolean hasPublicItems(){
+        private boolean hasPublicItems(){
            return getBookmarksIterator(getTagName(), false).hasNext();
        }

-        public void addUrl(final String urlHash){
-            this.urlHashes.add(urlHash);
+        private void addUrl(final String urlHash){
+            try {
+                this.urlHashes.put(ASCII.getBytes(urlHash));
+            } catch (SpaceExceededException e) {
+            }
        }

-        public void delete(final String urlHash){
-            this.urlHashes.remove(urlHash);
+        private void delete(final String urlHash){
+            this.urlHashes.remove(ASCII.getBytes(urlHash));
        }

        public int size(){
            return this.urlHashes.size();
        }

-        public boolean isEmpty() {
+        private boolean isEmpty() {
            return this.urlHashes.isEmpty();
        }
    }
@ -481,27 +443,19 @@ public class BookmarksDB {
     */
    public class Bookmark {

-        public static final String BOOKMARK_URL = "bookmarkUrl";
+        private static final String BOOKMARK_URL = "bookmarkUrl";
        public static final String BOOKMARK_TITLE = "bookmarkTitle";
        public static final String BOOKMARK_DESCRIPTION = "bookmarkDesc";
-        public static final String BOOKMARK_TAGS = "bookmarkTags";
-        public static final String BOOKMARK_PUBLIC = "bookmarkPublic";
-        public static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
-        public static final String BOOKMARK_OWNER = "bookmarkOwner";
-        public static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
+        private static final String BOOKMARK_TAGS = "bookmarkTags";
+        private static final String BOOKMARK_PUBLIC = "bookmarkPublic";
+        private static final String BOOKMARK_TIMESTAMP = "bookmarkTimestamp";
+        private static final String BOOKMARK_OWNER = "bookmarkOwner";
+        private static final String BOOKMARK_IS_FEED = "bookmarkIsFeed";
        private final String urlHash;
        private Set<String> tagNames;
        private long timestamp;
        private final Map<String, String> entry;

-        public Bookmark(final String urlHash, final Map<String, String> map) {
-            this.entry = map;
-            this.urlHash = urlHash;
-            this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
-            if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
-            loadTimestamp();
-        }
-
        public Bookmark(final DigestURL url) {
            this.entry = new HashMap<String, String>();
            this.urlHash = ASCII.String(url.hash());
@ -529,11 +483,15 @@ public class BookmarksDB {
            this(new DigestURL((url.indexOf("://") < 0) ? "http://" + url : url));
        }

-        public Bookmark(final Map<String, String> map) throws MalformedURLException {
-            this(ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash()), map);
+        private Bookmark(final Map<String, String> map) throws MalformedURLException {
+            this.entry = map;
+            this.urlHash = ASCII.String((new DigestURL(map.get(BOOKMARK_URL))).hash());
+            this.tagNames = new TreeSet<String>(String.CASE_INSENSITIVE_ORDER);
+            if (map.containsKey(BOOKMARK_TAGS)) this.tagNames.addAll(ListManager.string2set(map.get(BOOKMARK_TAGS)));
+            loadTimestamp();
        }

-        Map<String, String> toMap() {
+        private Map<String, String> toMap() {
            this.entry.put(BOOKMARK_TAGS, ListManager.collection2string(this.tagNames));
            this.entry.put(BOOKMARK_TIMESTAMP, String.valueOf(this.timestamp));
            return this.entry;
@ -688,11 +646,11 @@ public class BookmarksDB {
    /**
     * Subclass of bookmarksDB, which provides the bookmarkIterator object-type
     */
-    public class bookmarkIterator implements Iterator<Bookmark> {
+    private class bookmarkIterator implements Iterator<Bookmark> {

        Iterator<byte[]> bookmarkIter;

-        public bookmarkIterator(final boolean up) throws IOException {
+        private bookmarkIterator(final boolean up) throws IOException {
            //flushBookmarkCache(); //XXX: this will cost performance
            this.bookmarkIter = BookmarksDB.this.bookmarks.keys(up, false);
            //this.nextEntry = null;
@ -722,14 +680,14 @@ public class BookmarksDB {
    /**
     * Comparator to sort objects of type Bookmark according to their timestamps
     */
-    public class bookmarkComparator implements Comparator<String> {
+    private class bookmarkComparator implements Comparator<String> {

        private final boolean newestFirst;

        /**
         * @param newestFirst newest first, or oldest first?
         */
-        public bookmarkComparator(final boolean newestFirst){
+        private bookmarkComparator(final boolean newestFirst){
            this.newestFirst = newestFirst;
        }

@ -752,13 +710,13 @@ public class BookmarksDB {
        }
    }

-    public static final TagComparator tagComparator = new TagComparator();
-    public static final TagSizeComparator tagSizeComparator = new TagSizeComparator();
+    private static final TagComparator tagComparator = new TagComparator();
+    private static final TagSizeComparator tagSizeComparator = new TagSizeComparator();

    /**
     * Comparator to sort objects of type Tag according to their names
     */
-    public static class TagComparator implements Comparator<Tag>, Serializable {
+    private static class TagComparator implements Comparator<Tag>, Serializable {

    	/**
         * generated serial
@ -772,7 +730,7 @@ public class BookmarksDB {

    }

-    public static class TagSizeComparator implements Comparator<Tag>, Serializable {
+    private static class TagSizeComparator implements Comparator<Tag>, Serializable {

    	/**
         * generated serial
--- a/source/net/yacy/document/parser/html/CharacterCoding.java
+++ b/source/net/yacy/document/parser/html/CharacterCoding.java
@ -26,12 +26,15 @@ package net.yacy.document.parser.html;

 import java.util.HashMap;
 import java.util.Map;
+import java.util.regex.Pattern;

 /**
 * Contains methods to convert between Unicode and XML/HTML encoding.
 */
 public final class CharacterCoding {

+    /** Ampersand pattern */
+    public final static Pattern ampPattern = Pattern.compile(Pattern.quote("&amp;"));
    /** Ampersand character in unicode encoding. */
    private static final char AMP_UNICODE = "\u0026".charAt(0);
    /** Ampersand character in HTML encoding. */
@ -276,14 +279,15 @@ public final class CharacterCoding {
        }
        return sb.toString();
    }
-
+    
    /**
     * Replaces HTML-encoded characters with unicode representation.
     * @param text text with character to replace
     * @return text with replaced characters
     */
-    public static String html2unicode(final String text) {
+    public static String html2unicode(String text) {
        if (text == null) return null;
+        text = ampPattern.matcher(text).replaceAll("&"); // sometimes a double-replacement is necessary.
        int p = 0, p1, q;
        final StringBuilder sb = new StringBuilder(text.length());
        String s;
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -204,11 +204,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    @Override
-    public void scrapeText(final char[] newtext, final String insideTag) {
+    public void scrapeText(final char[] newtext0, final String insideTag) {
        // System.out.println("SCRAPE: " + UTF8.String(newtext));
        if (insideTag != null && ("script".equals(insideTag) || "style".equals(insideTag))) return;
        int p, pl, q, s = 0;
-
+        char[] newtext = CharacterCoding.html2unicode(new String(newtext0)).toCharArray();
+        
        // match evaluation pattern
        this.evaluationScores.match(Element.text, newtext);

@ -466,7 +467,8 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public void scrapeTag1(final String tagname, final Properties tagopts, char[] text) {
        // System.out.println("ScrapeTag1: tagname=" + tagname + ", opts=" + tagopts.toString() + ", text=" + UTF8.String(text));
        if (tagname.equalsIgnoreCase("a") && text.length < 2048) {
-            final String href = tagopts.getProperty("href", EMPTY_STRING);
+            String href = tagopts.getProperty("href", EMPTY_STRING);
+            href = CharacterCoding.html2unicode(href);
            AnchorURL url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
                final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
--- a/source/net/yacy/document/parser/pdfParser.java
+++ b/source/net/yacy/document/parser/pdfParser.java
@ -32,27 +32,15 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.util.Date;

-import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.exceptions.CryptographyException;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
 import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
-import org.apache.pdfbox.pdmodel.font.PDCIDFont;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font;
-import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font;
-import org.apache.pdfbox.pdmodel.font.PDFont;
-import org.apache.pdfbox.pdmodel.font.PDMMType1Font;
-import org.apache.pdfbox.pdmodel.font.PDSimpleFont;
-import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont;
-import org.apache.pdfbox.pdmodel.font.PDType0Font;
-import org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont;
-import org.apache.pdfbox.pdmodel.font.PDType1CFont;
-import org.apache.pdfbox.pdmodel.font.PDType1Font;
-import org.apache.pdfbox.pdmodel.font.PDType3Font;
 import org.apache.pdfbox.util.PDFTextStripper;

 import net.yacy.cora.document.id.AnchorURL;
@ -222,25 +210,54 @@ public class pdfParser extends AbstractParser implements Parser {
                false,
                docDate)};
    }
-
-    @SuppressWarnings("static-access")
+    
    public static void clean_up_idiotic_PDFParser_font_cache_which_eats_up_tons_of_megabytes() {
        // thank you very much, PDFParser hackers, this font cache will occupy >80MB RAM for a single pdf and then stays forever
        // AND I DO NOT EVEN NEED A FONT HERE TO PARSE THE TEXT!
        // Don't be so ignorant, just google once "PDFParser OutOfMemoryError" to feel the pain.
-        PDFont.clearResources();
-        COSName.clearResources();
-        PDType1Font.clearResources();
-        PDTrueTypeFont.clearResources();
-        PDType0Font.clearResources();
-        PDType1AfmPfbFont.clearResources();
-        PDType3Font.clearResources();
-        PDType1CFont.clearResources();
-        PDCIDFont.clearResources();
-        PDCIDFontType0Font.clearResources();
-        PDCIDFontType2Font.clearResources();
-        PDMMType1Font.clearResources();
-        PDSimpleFont.clearResources();
+        ResourceCleaner cl = new ResourceCleaner();
+        cl.clearClassResources("org.apache.pdfbox.cos.COSName");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDTrueTypeFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType0Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1AfmPfbFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType3Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDType1CFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFont");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDMMType1Font");
+        cl.clearClassResources("org.apache.pdfbox.pdmodel.font.PDSimpleFont");
+    }
+
+    @SuppressWarnings({ "unchecked", "rawtypes" })
+    private static class ResourceCleaner {
+        Method findLoadedClass;
+        private ClassLoader sys;
+        public ResourceCleaner() {
+            try {
+                this.findLoadedClass = ClassLoader.class.getDeclaredMethod("findLoadedClass", new Class[] { String.class });
+                this.findLoadedClass.setAccessible(true);
+                this.sys = ClassLoader.getSystemClassLoader();
+            } catch (Throwable e) {
+                e.printStackTrace();
+                this.findLoadedClass = null;
+                this.sys = null;
+            }
+        }
+        public void clearClassResources(String name) {
+            if (this.findLoadedClass == null) return;
+            try {
+                Object pdfparserpainclass = this.findLoadedClass.invoke(this.sys, name);
+                if (pdfparserpainclass != null) {
+                    Method clearResources = ((Class) pdfparserpainclass).getDeclaredMethod("clearResources", new Class[] {});
+                    if (clearResources != null) clearResources.invoke(null);
+                }
+            } catch (Throwable e) {
+                e.printStackTrace();
+            }
+        }
    }
    
    /**
--- a/source/net/yacy/http/CrashProtectionHandler.java
+++ b/source/net/yacy/http/CrashProtectionHandler.java
@ -37,12 +37,12 @@ public class CrashProtectionHandler extends HandlerWrapper implements Handler, H
 	}
 	
 	private void writeResponse(HttpServletRequest request, HttpServletResponse response, Exception exc) throws IOException {
-		PrintWriter out = response.getWriter();
-		out.println("Ops!");
-		out.println();
-		out.println("Message: " + exc.getMessage());
-		exc.printStackTrace(out);
-		response.setContentType("text/plain");
-        response.setStatus(500);
+            PrintWriter out = response.getWriter();
+            out.println("Ops!");
+            out.println();
+            out.println("Message: " + exc.getMessage());
+            exc.printStackTrace(out);
+            response.setContentType("text/plain");
+            response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
 	}
 }
--- a/source/net/yacy/http/ProxyHandler.java
+++ b/source/net/yacy/http/ProxyHandler.java
@ -91,7 +91,7 @@ public class ProxyHandler extends AbstractRemoteHandler implements Handler {
 			HttpServletResponse response) throws IOException, ServletException {

 		RequestHeader proxyHeaders = convertHeaderFromJetty(request);
-                final String httpVer = (String) request.getHeader(HeaderFramework.CONNECTION_PROP_HTTP_VER);
+                final String httpVer = request.getHeader(HeaderFramework.CONNECTION_PROP_HTTP_VER);
                setViaHeader (proxyHeaders, httpVer);
 		proxyHeaders.remove(RequestHeader.KEEP_ALIVE);
 		proxyHeaders.remove(RequestHeader.CONTENT_LENGTH);
--- a/source/net/yacy/http/SSIHandler.java
+++ b/source/net/yacy/http/SSIHandler.java
@ -27,7 +27,6 @@ package net.yacy.http;
 import java.io.IOException;
 import java.io.OutputStream;

-import javax.servlet.RequestDispatcher;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
--- a/source/net/yacy/http/TemplateHandler.java
+++ b/source/net/yacy/http/TemplateHandler.java
@ -97,11 +97,6 @@ public class TemplateHandler extends AbstractHandler implements Handler {
        htDocsPath = Switchboard.getSwitchboard().htDocsPath.getPath();
    }

-    @Override
-    protected void doStop() throws Exception {
-        super.doStop();
-    }
-
    /** Returns a path to the localized or default file according to the parameter localeSelection
     * @param path relative from htroot
     * @param localeSelection language of localized file; locale.language from switchboard is used if localeSelection.equals("") */
--- a/source/net/yacy/http/YaCyHttpServer.java
+++ b/source/net/yacy/http/YaCyHttpServer.java
@ -17,13 +17,13 @@ import java.net.SocketException;
 */
 public interface YaCyHttpServer {
    
-    abstract public void startupServer() throws Exception;
-    abstract public void stop() throws Exception;
-    abstract public void setMaxSessionCount(int cnt);
-    abstract public InetSocketAddress generateSocketAddress(String port) throws SocketException;
-    abstract public int getMaxSessionCount();
-    abstract public int getJobCount();
-    abstract public boolean withSSL();
-    abstract public void reconnect(int milsec);
-    abstract public String getVersion();
+    abstract void startupServer() throws Exception;
+    abstract void stop() throws Exception;
+    abstract void setMaxSessionCount(int cnt);
+    abstract InetSocketAddress generateSocketAddress(String port) throws SocketException;
+    abstract int getMaxSessionCount();
+    abstract int getJobCount();
+    abstract boolean withSSL();
+    abstract void reconnect(int milsec);
+    abstract String getVersion();
 }
--- a/source/net/yacy/peers/Transmission.java
+++ b/source/net/yacy/peers/Transmission.java
@ -25,7 +25,6 @@
 package net.yacy.peers;

 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@ -164,7 +163,7 @@ public class Transmission {
            final ReferenceContainer<WordReference> c = (remaining >= container.size()) ? container : trimContainer(container, remaining);
            // iterate through the entries in the container and check if the reference is in the repository
            final List<byte[]> notFoundx = new ArrayList<byte[]>();
-            Collection<String> testids = new HashSet<String>();
+            Set<String> testids = new HashSet<String>();
            Iterator<WordReference>  i = c.entries();
            while (i.hasNext()) {
                final WordReference e = i.next();
--- a/source/net/yacy/search/ResourceObserver.java
+++ b/source/net/yacy/search/ResourceObserver.java
@ -129,7 +129,7 @@ public class ResourceObserver {
    	if(MemoryControl.properState()) return Space.HIGH;
    	
        // clear some caches - @all: are there more of these, we could clear here?
-		this.sb.index.clearCache();
+		this.sb.index.clearCaches();
        SearchEventCache.cleanupEvents(true);
        this.sb.trail.clear();
        Switchboard.urlBlacklist.clearblacklistCache();
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -1585,7 +1585,7 @@ public final class Switchboard extends serverSwitch {
     * @param ids a collection of url hashes
     * @return a map from the hash id to: if it exists, the name of the database, otherwise null
     */
-    public Map<String, HarvestProcess> urlExists(final Collection<String> ids) {
+    public Map<String, HarvestProcess> urlExists(final Set<String> ids) {
        Set<String> e = this.index.exists(ids);
        Map<String, HarvestProcess> m = new HashMap<String, HarvestProcess>();
        for (String id: ids) {
@ -2031,7 +2031,7 @@ public final class Switchboard extends serverSwitch {

            // clear caches if necessary
            if ( !MemoryControl.request(128000000L, false) ) {
-                this.index.clearCache();
+                this.index.clearCaches();
                SearchEventCache.cleanupEvents(false);
                this.trail.clear();
                GuiHandler.clear();
@ -2556,12 +2556,16 @@ public final class Switchboard extends serverSwitch {
           ) {
            // get the hyperlinks
            final Map<DigestURL, String> hl = Document.getHyperlinks(documents);
-            boolean loadImages = getConfigBool(SwitchboardConstants.CRAWLER_LOAD_IMAGE, true);
-            if (loadImages) hl.putAll(Document.getImagelinks(documents));
+            for (Map.Entry<DigestURL, String> entry: Document.getImagelinks(documents).entrySet()) {
+                if (TextParser.supportsExtension(entry.getKey()) == null) hl.put(entry.getKey(), entry.getValue());
+            }
+            
            
            // add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links
            if (response.profile().directDocByURL()) {
-                if (!loadImages) hl.putAll(Document.getImagelinks(documents));
+                for (Map.Entry<DigestURL, String> entry: Document.getImagelinks(documents).entrySet()) {
+                    if (TextParser.supportsExtension(entry.getKey()) != null) hl.put(entry.getKey(), entry.getValue());
+                }
                hl.putAll(Document.getApplinks(documents));
                hl.putAll(Document.getVideolinks(documents));
                hl.putAll(Document.getAudiolinks(documents));
@ -2905,7 +2909,7 @@ public final class Switchboard extends serverSwitch {
        // stacking may fail because of double occurrences of that url. Therefore
        // we must wait here until the url has actually disappeared
        int t = 100;
-        Collection<String> ids = new ArrayList<String>(1); ids.add(ASCII.String(urlhash));
+        Set<String> ids = new HashSet<String>(1); ids.add(ASCII.String(urlhash));
        while (t-- > 0 && this.index.exists(ids).size() > 0) {
            try {Thread.sleep(100);} catch (final InterruptedException e) {}
            ConcurrentLog.fine("Switchboard", "STACKURL: waiting for deletion, t=" + t);
--- a/source/net/yacy/search/SwitchboardConstants.java
+++ b/source/net/yacy/search/SwitchboardConstants.java
@ -323,7 +323,6 @@ public final class SwitchboardConstants {
     * <p><code>public static final String <strong>CRAWLER_THREADS_ACTIVE_MAX</strong> = "crawler.MaxActiveThreads"</code></p>
     * <p>Name of the setting how many active crawler-threads may maximal be running on the same time</p>
     */
-    public static final String CRAWLER_LOAD_IMAGE               = "crawler.load.image";
    public static final String CRAWLER_THREADS_ACTIVE_MAX       = "crawler.MaxActiveThreads";
    public static final String CRAWLER_FOLLOW_REDIRECTS         = "crawler.http.FollowRedirects"; // ignore the target url and follow to the redirect
    public static final String CRAWLER_RECORD_REDIRECTS         = "crawler.http.RecordRedirects"; // record the ignored redirected page to the index store
--- a/source/net/yacy/search/index/Fulltext.java
+++ b/source/net/yacy/search/index/Fulltext.java
@ -225,10 +225,10 @@ public final class Fulltext {
        }
    }

-    public void clearCache() {
+    public void clearCaches() {
        if (this.urlIndexFile != null && this.urlIndexFile instanceof Cache) ((Cache) this.urlIndexFile).clearCache();
        if (this.statsDump != null) this.statsDump.clear();
-        this.solrInstances.clearCache();
+        this.solrInstances.clearCaches();
        this.statsDump = null;
    }

@ -250,7 +250,7 @@ public final class Fulltext {
                for (String name: instance.getCoreNames()) new EmbeddedSolrConnector(instance, name).clear();
            }
            this.commit(false);
-            this.solrInstances.clearCache();
+            this.solrInstances.clearCaches();
        }
    }

@ -260,7 +260,7 @@ public final class Fulltext {
            if (instance != null) {
                for (String name: instance.getCoreNames()) new RemoteSolrConnector(instance, name).clear();
            }
-            this.solrInstances.clearCache();
+            this.solrInstances.clearCaches();
        }
    }

@ -400,7 +400,7 @@ public final class Fulltext {
            throw new IOException(e.getMessage(), e);
        }
        this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
    }

    public void putEdges(final Collection<SolrInputDocument> edges) throws IOException {
@ -412,7 +412,7 @@ public final class Fulltext {
            throw new IOException(e.getMessage(), e);
        }
        this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
    }

    /**
@ -432,7 +432,7 @@ public final class Fulltext {
            throw new IOException(e.getMessage(), e);
        }
        this.statsDump = null;
-        if (MemoryControl.shortStatus()) clearCache();
+        if (MemoryControl.shortStatus()) clearCaches();
    }

    /**
@ -617,10 +617,11 @@ public final class Fulltext {
     * @param ids
     * @return a set of ids which exist in the database
     */
-    public Set<String> exists(Collection<String> ids) {
+    public Set<String> exists(Set<String> ids) {
        HashSet<String> e = new HashSet<String>();
        if (ids == null || ids.size() == 0) return e;
-        Collection<String> idsC = new HashSet<String>();
+        if (ids.size() == 1) return exists(ids.iterator().next()) ? ids : e;
+        Set<String> idsC = new HashSet<String>();
        idsC.addAll(ids);
        if (this.urlIndexFile != null) {
            Iterator<String> idsi = idsC.iterator();
@ -751,12 +752,12 @@ public final class Fulltext {
    }
    
    // export methods
-    public Export export(final File f, final String filter, final int format, final boolean dom) {
+    public Export export(final File f, final String filter, final String query, final int format, final boolean dom) {
        if ((this.exportthread != null) && (this.exportthread.isAlive())) {
            ConcurrentLog.warn("LURL-EXPORT", "cannot start another export thread, already one running");
            return this.exportthread;
        }
-        this.exportthread = new Export(f, filter, format, dom);
+        this.exportthread = new Export(f, filter, query, format, dom);
        this.exportthread.start();
        return this.exportthread;
    }
@ -769,14 +770,15 @@ public final class Fulltext {
        private final File f;
        private final Pattern pattern;
        private int count;
-        private String failure;
+        private String failure, query;
        private final int format;
        private final boolean dom;

-        private Export(final File f, final String filter, final int format, boolean dom) {
+        private Export(final File f, final String filter, final String query, final int format, boolean dom) {
            // format: 0=text, 1=html, 2=rss/xml
            this.f = f;
            this.pattern = filter == null ? null : Pattern.compile(filter);
+            this.query = query == null? "*:*" : query;
            this.count = 0;
            this.failure = null;
            this.format = format;
@ -805,7 +807,7 @@ public final class Fulltext {
                
               
                if (this.dom) {
-                    Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
+                    Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
                    ReversibleScoreMap<String> stats = scores.get(CollectionSchema.host_s.getSolrFieldName());
                    for (final String host: stats) {
                        if (this.pattern != null && !this.pattern.matcher(host).matches()) continue;
@ -814,21 +816,19 @@ public final class Fulltext {
                        this.count++;
                    }
                } else {
-                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
+                    BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
                            CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
                            CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
                    SolrDocument doc;
-                    ArrayList<?> title;
-                    String url, author, hash;
-                    String[] descriptions;
+                    String url, hash, title, author, description;
                    Integer size;
                    Date date;
                    while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
-                        hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
-                        url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
-                        title = (ArrayList<?>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
-                        author = (String) doc.getFieldValue(CollectionSchema.author.getSolrFieldName());
-                        descriptions = (String[]) doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName());
+                        hash = getStringFrom(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
+                        url = getStringFrom(doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
+                        title = getStringFrom(doc.getFieldValue(CollectionSchema.title.getSolrFieldName()));
+                        author = getStringFrom(doc.getFieldValue(CollectionSchema.author.getSolrFieldName()));
+                        description = getStringFrom(doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName()));
                        size = (Integer) doc.getFieldValue(CollectionSchema.size_i.getSolrFieldName());
                        date = (Date) doc.getFieldValue(CollectionSchema.last_modified.getSolrFieldName());
                        if (this.pattern != null && !this.pattern.matcher(url).matches()) continue;
@ -836,16 +836,14 @@ public final class Fulltext {
                            pw.println(url);
                        }
                        if (this.format == 1) {
-                            if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</a>");
+                            if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml(title, true) + "</a>");
                        }
                        if (this.format == 2) {
                            pw.println("<item>");
-                            if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</title>");
+                            if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml(title, true) + "</title>");
                            pw.println("<link>" + MultiProtocolURL.escape(url) + "</link>");
                            if (author != null && !author.isEmpty()) pw.println("<author>" + CharacterCoding.unicode2xml(author, true) + "</author>");
-                            if (descriptions != null && descriptions.length > 0) {
-                                for (String d: descriptions) pw.println("<description>" + CharacterCoding.unicode2xml(d, true) + "</description>");
-                            }
+                            if (description != null && !description.isEmpty()) pw.println("<description>" + CharacterCoding.unicode2xml(description, true) + "</description>");
                            if (date != null) pw.println("<pubDate>" + HeaderFramework.formatRFC1123(date) + "</pubDate>");
                            if (size != null) pw.println("<yacy:size>" + size.intValue() + "</yacy:size>");
                            pw.println("<guid isPermaLink=\"false\">" + hash + "</guid>");
@ -883,6 +881,13 @@ public final class Fulltext {
        public int count() {
            return this.count;
        }
+        
+        @SuppressWarnings("unchecked")
+		private String getStringFrom(final Object o) {
+        	if (o == null) return "";
+        	if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
+        	return (String) o;
+        }

    }
    
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -29,7 +29,6 @@ package net.yacy.search.index;
 import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
-import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
@ -443,7 +442,7 @@ public class Segment {
     * @param ids
     * @return a set of ids which exist in the database
     */
-    public Set<String> exists(final Collection<String> ids) {
+    public Set<String> exists(final Set<String> ids) {
        return this.fulltext.exists(ids);
    }

@ -504,10 +503,10 @@ public class Segment {
        }
    }
    
-    public void clearCache() {
+    public void clearCaches() {
        if (this.urlCitationIndex != null) this.urlCitationIndex.clearCache();
        if (this.termIndex != null) this.termIndex.clearCache();
-        this.fulltext.clearCache();
+        this.fulltext.clearCaches();
    }

    public File getLocation() {
--- a/source/net/yacy/search/query/QueryGoal.java
+++ b/source/net/yacy/search/query/QueryGoal.java
@ -242,7 +242,8 @@ public class QueryGoal {
        // add filter to prevent that results come from failed urls
        q.append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200").append(" AND (");
        q.append(CollectionSchema.images_urlstub_sxt.getSolrFieldName()).append(":[* TO *] OR ");
-        q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif))");
+        q.append(CollectionSchema.url_file_ext_s.getSolrFieldName()).append(":(jpg OR png OR gif) OR");
+        q.append(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*))");
        
        // parse special requests
        if (isCatchall()) return q;
--- a/source/net/yacy/search/schema/CollectionConfiguration.java
+++ b/source/net/yacy/search/schema/CollectionConfiguration.java
@ -898,17 +898,19 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
            String query = (harvestkey == null || !segment.fulltext().getDefaultConfiguration().contains(CollectionSchema.harvestkey_s) ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
                    CollectionSchema.process_sxt.getSolrFieldName() + ":" + ProcessType.CITATION.toString();
            hostscore = collectionConnector.getFacets(query, 10000000, CollectionSchema.host_s.getSolrFieldName()).get(CollectionSchema.host_s.getSolrFieldName());
-            if (hostscore == null) hostscore = new ClusteredScoreMap<String>();
-
+            ConcurrentLog.info("CollectionConfiguration", "collecting " + hostscore.size() + " hosts");
+            int countcheck = 0;
            for (String host: hostscore.keyList(true)) {
                // Patch the citation index for links with canonical tags.
                // This shall fulfill the following requirement:
-                // If a document A links to B and B contains a 'canonical C', then the citation rank coputation shall consider that A links to C and B does not link to C.
+                // If a document A links to B and B contains a 'canonical C', then the citation rank computation shall consider that A links to C and B does not link to C.
                // To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
                String patchquery = CollectionSchema.host_s.getSolrFieldName() + ":" + host + " AND " + CollectionSchema.canonical_s.getSolrFieldName() + ":[* TO *]";
+                long patchquerycount = collectionConnector.getCountByQuery(patchquery);
                BlockingQueue<SolrDocument> documents_with_canonical_tag = collectionConnector.concurrentDocumentsByQuery(patchquery, 0, 10000000, 60000L, 50,
                        CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.canonical_s.getSolrFieldName());
                SolrDocument doc_B;
+                int patchquerycountcheck = 0;
                try {
                    while ((doc_B = documents_with_canonical_tag.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                        // find all documents which link to the canonical doc
@ -926,10 +928,12 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                            CitationReference doc_A_citation = doc_A_ids_iterator.next();
                            segment.urlCitation().add(doc_C_url.hash(), doc_A_citation);
                        }
+                        patchquerycountcheck++;
                    }
                } catch (InterruptedException e) {
                } catch (SpaceExceededException e) {
                }
+                if (patchquerycount != patchquerycountcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous patchquery count for host " + host + ": expected=" + patchquerycount + ", counted=" + patchquerycountcheck);
                
                // do the citation rank computation
                if (hostscore.get(host) <= 0) continue;
@ -939,12 +943,14 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                while (convergence_attempts++ < 30) {
                    if (crh.convergenceStep()) break;
                }
-                ConcurrentLog.info("CollectionConfiguration.CRHost", "convergence for host " + host + " after " + convergence_attempts + " steps");
+                ConcurrentLog.info("CollectionConfiguration", "convergence for host " + host + " after " + convergence_attempts + " steps");
                // we have now the cr for all documents of a specific host; we store them for later use
                Map<byte[], CRV> crn = crh.normalize();
                //crh.log(crn);
                ranking.putAll(crn); // accumulate this here for usage in document update later
+                countcheck++;
            }
+            if (hostscore.size() != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous host count: expected=" + hostscore.size() + ", counted=" + countcheck);
        } catch (final IOException e2) {
            hostscore = new ClusteredScoreMap<String>();
        }
@ -952,13 +958,15 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
        // process all documents at the webgraph for the outgoing links of this document
        SolrDocument doc;
        if (webgraphConnector != null) {
-            for (String host: hostscore.keyList(true)) {
-                if (hostscore.get(host) <= 0) continue;
-                // select all webgraph edges and modify their cr value
-                BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(
-                        WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\"",
-                        0, 10000000, 60000, 50);
-                try {
+            try {
+                for (String host: hostscore.keyList(true)) {
+                    if (hostscore.get(host) <= 0) continue;
+                    // select all webgraph edges and modify their cr value
+                    String query = WebgraphSchema.source_host_s.getSolrFieldName() + ":\"" + host + "\"";
+                    long count = webgraphConnector.getCountByQuery(query);
+                    ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the webgraph");
+                    BlockingQueue<SolrDocument> docs = webgraphConnector.concurrentDocumentsByQuery(query, 0, 10000000, 60000, 50);
+                    int countcheck = 0;
                    while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                        boolean changed = false;
                        SolrInputDocument sid = segment.fulltext().getWebgraphConfiguration().toSolrInputDocument(doc, null);
@ -978,21 +986,29 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                            webgraphConnector.add(sid);
                        } catch (SolrException e) {
                        } catch (IOException e) {
-                       }
+                        }
+                        countcheck++;
                    }
-                } catch (final InterruptedException e) {}
+                    if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous webgraph document count for host " + host + ": expected=" + count + ", counted=" + countcheck);
+                }
+            } catch (final IOException e2) {
+                ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
+            } catch (final InterruptedException e3) {
+                ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
            }
        }
        
        // process all documents in collection
-        BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(
-                (harvestkey == null ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
-                CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]",
-                0, 10000, 60000, 50);
+        String query = (harvestkey == null ? "" : CollectionSchema.harvestkey_s.getSolrFieldName() + ":\"" + harvestkey + "\" AND ") +
+                CollectionSchema.process_sxt.getSolrFieldName() + ":[* TO *]";
        int proccount = 0, proccount_clickdepthchange = 0, proccount_referencechange = 0, proccount_citationchange = 0, proccount_uniquechange = 0;
        Map<String, Long> hostExtentCache = new HashMap<String, Long>(); // a mapping from the host id to the number of documents which contain this host-id
        Set<String> uniqueURLs = new HashSet<String>();
        try {
+            long count = collectionConnector.getCountByQuery(query);
+            ConcurrentLog.info("CollectionConfiguration", "collecting " + count + " documents from the collection for harvestkey " + harvestkey);
+            BlockingQueue<SolrDocument> docs = collectionConnector.concurrentDocumentsByQuery(query, 0, 10000, 60000, 50);
+            int countcheck = 0;
            while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
                // for each to-be-processed entry work on the process tag
                Collection<Object> proctags = doc.getFieldValues(CollectionSchema.process_sxt.getSolrFieldName());
@ -1031,8 +1047,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                    if (!hostExtentCache.containsKey(hosthash)) {
                        StringBuilder q = new StringBuilder();
                        q.append(CollectionSchema.host_id_s.getSolrFieldName()).append(":\"").append(hosthash).append("\" AND ").append(CollectionSchema.httpstatus_i.getSolrFieldName()).append(":200");
-                        long count = segment.fulltext().getDefaultConnector().getCountByQuery(q.toString());
-                        hostExtentCache.put(hosthash, count);
+                        long hostExtentCount = segment.fulltext().getDefaultConnector().getCountByQuery(q.toString());
+                        hostExtentCache.put(hosthash, hostExtentCount);
                    }
                    if (postprocessing_references(rrCache, doc, sid, url, hostExtentCache)) proccount_referencechange++;
                    
@ -1047,13 +1063,18 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                    proccount++;
                } catch (final Throwable e1) {
                }
+                countcheck++;
            }
+            if (count != countcheck) ConcurrentLog.warn("CollectionConfiguration", "ambiguous collection document count for harvestkey " + harvestkey + ": expected=" + count + ", counted=" + countcheck);
            ConcurrentLog.info("CollectionConfiguration", "cleanup_processing: re-calculated " + proccount+ " new documents, " +
                        proccount_clickdepthchange + " clickdepth changes, " +
                        proccount_referencechange + " reference-count changes, " +
                        proccount_uniquechange + " unique field changes, " +
                        proccount_citationchange + " citation ranking changes.");
-        } catch (final InterruptedException e) {
+        } catch (final InterruptedException e2) {
+            ConcurrentLog.warn("CollectionConfiguration", e2.getMessage(), e2);
+        } catch (IOException e3) {
+            ConcurrentLog.warn("CollectionConfiguration", e3.getMessage(), e3);
        }
        return proccount;
    }
@ -1148,8 +1169,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
                if (entry == null || entry.getValue() == null) continue;
                try {
                    String url = (String) connector.getDocumentById(ASCII.String(entry.getKey()), CollectionSchema.sku.getSolrFieldName()).getFieldValue(CollectionSchema.sku.getSolrFieldName());
-                    ConcurrentLog.info("CollectionConfiguration.CRHost", "CR for " + url);
-                    ConcurrentLog.info("CollectionConfiguration.CRHost", ">> " + entry.getValue().toString());
+                    ConcurrentLog.info("CollectionConfiguration", "CR for " + url);
+                    ConcurrentLog.info("CollectionConfiguration", ">> " + entry.getValue().toString());
                } catch (final IOException e) {
                    ConcurrentLog.logException(e);
                }