From 85a5487d6dc2f48757e66a68fcbae534824f4745 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 13 Sep 2011 14:39:41 +0000
Subject: [PATCH] YaCy can now use the solr index to compute text snippets.
 This makes search result preparation MUCH faster because no document fetching
 and parsing is necessary any more.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7943 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/IndexControlRWIs_p.java                |  5 +-
 htroot/IndexFederated_p.java                  | 20 ++--
 source/de/anomic/crawler/CrawlQueues.java     | 11 ++-
 source/de/anomic/crawler/ZURL.java            |  5 +-
 source/de/anomic/search/ResultFetcher.java    | 24 ++++-
 source/de/anomic/search/Segment.java          | 12 +++
 source/de/anomic/search/Segments.java         | 64 ++++++------
 source/de/anomic/search/Switchboard.java      | 10 +-
 source/de/anomic/search/TextSnippet.java      | 58 ++++++-----
 .../federated/solr/SolrChardingConnector.java | 11 +--
 .../federated/solr/SolrConnector.java         | 99 +++++++++++++++++++
 .../services/federated/solr/SolrScheme.java   | 43 ++++++++
 .../federated/solr/SolrSingleConnector.java   | 29 +++++-
 source/net/yacy/document/Document.java        |  8 +-
 14 files changed, 304 insertions(+), 95 deletions(-)
 create mode 100644 source/net/yacy/cora/services/federated/solr/SolrConnector.java

diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java
index a8541602c..5339a6775 100644
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@@ -66,6 +66,7 @@ import de.anomic.search.RankingProcess;
 import de.anomic.search.ReferenceOrder;
 import de.anomic.search.SearchEventCache;
 import de.anomic.search.Segment;
+import de.anomic.search.Segments;
 import de.anomic.search.Switchboard;
 import de.anomic.search.SwitchboardConstants;
 import de.anomic.server.serverObjects;
@@ -86,7 +87,7 @@ public class IndexControlRWIs_p {
         prop.put("keyhash", "");
         prop.put("result", "");
         prop.put("cleanup", post == null || post.containsKey("maxReferencesLimit") ? 1 : 0);
-        prop.put("cleanup_solr", sb.solrConnector == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
+        prop.put("cleanup_solr", sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null || !sb.getConfigBool("federated.service.solr.indexing.enabled", false) ? 0 : 1);
 
         String segmentName = sb.getConfig(SwitchboardConstants.SEGMENT_PUBLIC, "default");
         int i = 0;
@@ -157,7 +158,7 @@ public class IndexControlRWIs_p {
                     segment.clear();
                 }
                 if (post.get("deleteSolr", "").equals("on") && sb.getConfigBool("federated.service.solr.indexing.enabled", false)) try {
-                    sb.solrConnector.clear();
+                    sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().clear();
                 } catch (final Exception e) {
                     Log.logException(e);
                 }
diff --git a/htroot/IndexFederated_p.java b/htroot/IndexFederated_p.java
index dd798dcd0..9777cf341 100644
--- a/htroot/IndexFederated_p.java
+++ b/htroot/IndexFederated_p.java
@@ -33,9 +33,12 @@ import net.yacy.cora.document.UTF8;
 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.services.federated.solr.SolrChardingConnector;
 import net.yacy.cora.services.federated.solr.SolrChardingSelection;
+import net.yacy.cora.services.federated.solr.SolrConnector;
 import net.yacy.cora.services.federated.solr.SolrScheme;
+import net.yacy.cora.services.federated.solr.SolrSingleConnector;
 import net.yacy.cora.storage.ConfigurationSet;
 import net.yacy.kelondro.logging.Log;
+import de.anomic.search.Segments;
 import de.anomic.search.Switchboard;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@@ -75,8 +78,8 @@ public class IndexFederated_p {
 
             if (solrWasOn) {
                 // switch off
-                sb.solrConnector.close();
-                sb.solrConnector = null;
+                sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().close();
+                sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
             }
 
             final SolrScheme scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/" + schemename));
@@ -85,10 +88,10 @@ public class IndexFederated_p {
                 // switch on
                 final boolean usesolr = sb.getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
                 try {
-                    sb.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
+                    sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr((usesolr) ? new SolrChardingConnector(solrurls, scheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null);
                 } catch (final IOException e) {
                     Log.logException(e);
-                    sb.solrConnector = null;
+                    sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
                 }
             }
 
@@ -110,12 +113,13 @@ public class IndexFederated_p {
         }
 
         // show solr host table
-        if (sb.solrConnector == null) {
+        if (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null) {
             prop.put("table", 0);
         } else {
             prop.put("table", 1);
-            final long[] size = sb.solrConnector.getSizeList();
-            final String[] urls = sb.solrConnector.getAdminInterfaceList();
+            final SolrConnector solr = sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr();
+            final long[] size = (solr instanceof SolrChardingConnector) ? ((SolrChardingConnector) solr).getSizeList() : new long[]{((SolrSingleConnector) solr).getSize()};
+            final String[] urls = (solr instanceof SolrChardingConnector) ? ((SolrChardingConnector) solr).getAdminInterfaceList() : new String[]{((SolrSingleConnector) solr).getAdminInterface()};
             boolean dark = false;
             for (int i = 0; i < size.length; i++) {
                 prop.put("table_list_" + i + "_dark", dark ? 1 : 0); dark = !dark;
@@ -126,7 +130,7 @@ public class IndexFederated_p {
         }
 
         // write scheme
-        SolrScheme scheme = (sb.solrConnector == null) ? null : sb.solrConnector.getScheme();
+        SolrScheme scheme = (sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() == null) ? null : sb.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().getScheme();
         final String schemename = sb.getConfig("federated.service.solr.indexing.schemefile", "solr.keys.default.list");
         if (scheme == null) {
             scheme = new SolrScheme(new File(env.getDataPath(), "DATA/SETTINGS/" + schemename));
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 4d6e9099f..4cc879a69 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -62,6 +62,7 @@ public class CrawlQueues {
 
     private static final String ERROR_DB_FILENAME = "urlError3.db";
     private static final String DELEGATED_DB_FILENAME = "urlDelegated3.db";
+    private static final Segments.Process PROCESS = Segments.Process.LOCALCRAWLING;
 
     protected Switchboard sb;
     protected Log log;
@@ -81,8 +82,8 @@ public class CrawlQueues {
         this.log.logConfig("Starting Crawling Management");
         this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
         FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(sb.solrConnector, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
-        this.delegatedURL = new ZURL(sb.solrConnector, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
+        this.errorURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
+        this.delegatedURL = new ZURL(sb.indexSegments.segment(PROCESS).getSolr(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
     }
 
     public void relocate(final File newQueuePath) {
@@ -93,8 +94,8 @@ public class CrawlQueues {
 
         this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
         FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(this.sb.solrConnector, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
-        this.delegatedURL = new ZURL(this.sb.solrConnector, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
+        this.errorURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
+        this.delegatedURL = new ZURL(this.sb.indexSegments.segment(PROCESS).getSolr(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
     }
 
     public void close() {
@@ -249,7 +250,7 @@ public class CrawlQueues {
                         return true;
                     }
                     try {
-                        this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(Segments.Process.LOCALCRAWLING, new Response(urlEntry, profile), null, null));
+                        this.sb.indexingDocumentProcessor.enQueue(new indexingQueueEntry(PROCESS, new Response(urlEntry, profile), null, null));
                         Log.logInfo("CrawlQueues", "placed NOLOAD URL on indexing queue: " + urlEntry.url().toNormalform(true, false));
                     } catch (final InterruptedException e) {
                         Log.logException(e);
diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java
index 0b14e5e44..7ebc1b223 100755
--- a/source/de/anomic/crawler/ZURL.java
+++ b/source/de/anomic/crawler/ZURL.java
@@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
 import net.yacy.cora.document.ASCII;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.services.federated.solr.SolrChardingConnector;
+import net.yacy.cora.services.federated.solr.SolrConnector;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.index.Index;
@@ -76,10 +77,10 @@ public class ZURL implements Iterable<ZURL.Entry> {
     // the class object
     private Index urlIndex;
     private final ConcurrentLinkedQueue<byte[]> stack;
-    private final SolrChardingConnector solrConnector;
+    private final SolrConnector solrConnector;
 
     public ZURL(
-            final SolrChardingConnector solrConnector,
+            final SolrConnector solrConnector,
     		final File cachePath,
     		final String tablename,
     		final boolean startWithEmptyFile,
diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java
index 04767a074..6b2cbd6cc 100644
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@@ -31,11 +31,13 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.regex.Pattern;
 
+import net.yacy.cora.document.ASCII;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.ranking.ScoreMap;
 import net.yacy.cora.ranking.WeakPriorityBlockingQueue;
 import net.yacy.cora.ranking.WeakPriorityBlockingQueue.ReverseElement;
+import net.yacy.cora.services.federated.solr.SolrConnector;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
@@ -46,6 +48,10 @@ import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.EventTracker;
 import net.yacy.kelondro.util.MemoryControl;
 import net.yacy.repository.LoaderDispatcher;
+
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+
 import de.anomic.data.WorkTables;
 import de.anomic.http.client.Cache;
 import de.anomic.yacy.yacySeedDB;
@@ -322,6 +328,7 @@ public class ResultFetcher {
         private final int neededResults;
         private final Pattern snippetPattern;
         private boolean shallrun;
+        private final SolrConnector solr;
 
         public Worker(final int id, final long maxlifetime, final CacheStrategy cacheStrategy, final Pattern snippetPattern, final int neededResults) {
             this.id = id;
@@ -331,6 +338,7 @@ public class ResultFetcher {
             this.timeout = System.currentTimeMillis() + Math.max(1000, maxlifetime);
             this.neededResults = neededResults;
             this.shallrun = true;
+            this.solr = ResultFetcher.this.rankingProcess.getQuery().getSegment().getSolr();
         }
 
         @Override
@@ -373,8 +381,18 @@ public class ResultFetcher {
                     }
                     if (ResultFetcher.this.query.filterfailurls && ResultFetcher.this.workTables.failURLsContains(page.hash())) continue;
 
+                    // in case that we have an attached solr, we load also the solr document
+                    String solrContent = null;
+                    if (this.solr != null) {
+                        SolrDocument sd = null;
+                        final SolrDocumentList sdl = this.solr.get("id:" + ASCII.String(page.hash()), 0, 1);
+                        if (sdl.size() > 0) sd = sdl.get(0);
+                        if (sd != null) solrContent = this.solr.getScheme().solrGetText(sd);
+                    }
+
+
                     loops++;
-                    resultEntry = fetchSnippet(page, this.cacheStrategy); // does not fetch snippets if snippetMode == 0
+                    resultEntry = fetchSnippet(page, solrContent, this.cacheStrategy); // does not fetch snippets if snippetMode == 0
                     if (resultEntry == null) continue; // the entry had some problems, cannot be used
                     rawLine = resultEntry.textSnippet() == null ? null : resultEntry.textSnippet().getLineRaw();
                     //System.out.println("***SNIPPET*** raw='" + rawLine + "', pattern='" + this.snippetPattern.toString() + "'");
@@ -412,7 +430,7 @@ public class ResultFetcher {
         }
     }
 
-    protected ResultEntry fetchSnippet(final URIMetadataRow page, final CacheStrategy cacheStrategy) {
+    protected ResultEntry fetchSnippet(final URIMetadataRow page, final String solrText, final CacheStrategy cacheStrategy) {
         // Snippet Fetching can has 3 modes:
         // 0 - do not fetch snippets
         // 1 - fetch snippets offline only
@@ -429,6 +447,7 @@ public class ResultFetcher {
         if (cacheStrategy == null) {
             final TextSnippet snippet = new TextSnippet(
                     null,
+                    solrText,
                     metadata,
                     this.snippetFetchWordHashes,
                     null,
@@ -445,6 +464,7 @@ public class ResultFetcher {
             startTime = System.currentTimeMillis();
             final TextSnippet snippet = new TextSnippet(
                     this.loader,
+                    solrText,
                     metadata,
                     this.snippetFetchWordHashes,
                     cacheStrategy,
diff --git a/source/de/anomic/search/Segment.java b/source/de/anomic/search/Segment.java
index 863264750..fbe5a962c 100644
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@@ -37,6 +37,7 @@ import java.util.TreeSet;
 import net.yacy.cora.document.ASCII;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
+import net.yacy.cora.services.federated.solr.SolrConnector;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@@ -81,6 +82,7 @@ public class Segment {
     protected final IndexCell<WordReference>       termIndex;
     //private   final IndexCell<NavigationReference> authorNavIndex;
     protected final MetadataRepository             urlMetadata;
+    private         SolrConnector                  solr;
     private   final File                           segmentPath;
 
     public Segment(
@@ -98,6 +100,7 @@ public class Segment {
 
         this.log = log;
         this.segmentPath = segmentPath;
+        this.solr = null;
 
         this.termIndex = new IndexCell<WordReference>(
                 segmentPath,
@@ -126,6 +129,14 @@ public class Segment {
         this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727);
     }
 
+    public void connectSolr(final SolrConnector solr) {
+        this.solr = solr;
+    }
+
+    public SolrConnector getSolr() {
+        return this.solr;
+    }
+
     public static void migrateTextIndex(final File oldSegmentPath, final File newSegmentPath) {
         final File oldCellPath = new File(oldSegmentPath, "RICELL");
         if (!oldCellPath.exists()) return;
@@ -254,6 +265,7 @@ public class Segment {
     public void close() {
         this.termIndex.close();
         this.urlMetadata.close();
+        if (this.solr != null) this.solr.close();
     }
 
     public URIMetadataRow storeDocument(
diff --git a/source/de/anomic/search/Segments.java b/source/de/anomic/search/Segments.java
index f80b25ae1..dc4f5e896 100644
--- a/source/de/anomic/search/Segments.java
+++ b/source/de/anomic/search/Segments.java
@@ -38,13 +38,13 @@ import net.yacy.kelondro.rwi.IndexCell;
 
 
 public class Segments implements Iterable<Segment> {
-    
+
     /**
      * process enumeration type
      * defines constants that can be used to assign process-related segment names
      */
     public enum Process {
-        
+
         RECEIPTS,
         QUERIES,
         DHTIN,
@@ -59,7 +59,7 @@ public class Segments implements Iterable<Segment> {
             throw new UnsupportedOperationException("toString not allowed");
         }
     }
-    
+
     private final Log log;
     private final File segmentsPath;
     private final int entityCacheMaxSize;
@@ -68,7 +68,7 @@ public class Segments implements Iterable<Segment> {
     private final HashMap<Process, String> process_assignment;
     private final boolean useTailCache;
     private final boolean exceed134217727;
-    
+
     public Segments(
             final Log log,
             final File segmentsPath,
@@ -96,41 +96,41 @@ public class Segments implements Iterable<Segment> {
         this.process_assignment.put(Process.PUBLIC,         "default");
         this.process_assignment.put(Process.SURROGATES,     "default");
     }
-    
-    public void setSegment(Process process, String segmentName) {
+
+    public void setSegment(final Process process, final String segmentName) {
         this.process_assignment.put(process, segmentName);
     }
-    
-    public static void migrateOld(File oldSingleSegment, File newSegmentsPath, String newSegmentName) {
+
+    public static void migrateOld(final File oldSingleSegment, final File newSegmentsPath, final String newSegmentName) {
         if (!oldSingleSegment.exists()) return;
-        File newSegmentPath = new File(newSegmentsPath, newSegmentName);
+        final File newSegmentPath = new File(newSegmentsPath, newSegmentName);
         if (!newSegmentPath.exists()) newSegmentPath.mkdirs();
         Segment.migrateTextIndex(oldSingleSegment, newSegmentPath);
         Segment.migrateTextMetadata(oldSingleSegment, newSegmentPath);
-        
-        String[] oldFiles = oldSingleSegment.list();
-        for (String oldFile: oldFiles) {
+
+        final String[] oldFiles = oldSingleSegment.list();
+        for (final String oldFile: oldFiles) {
             if (oldFile.startsWith("text.")) {
                 new File(oldSingleSegment, oldFile).renameTo(new File(newSegmentPath, oldFile));
             }
         }
     }
-    
+
     public String[] segmentNames() {
         return this.segments.keySet().toArray(new String[this.segments.size()]);
     }
-    
+
     public boolean segmentExist(final String segmentName) {
-        return segments.containsKey(segmentName);
+        return this.segments.containsKey(segmentName);
     }
-    
+
     public Segment segment(final Process process) {
         return segment(this.process_assignment.get(process));
     }
-    
+
     public Segment segment(final String segmentName) {
-        if (segments == null) return null;
-        Segment segment = segments.get(segmentName);
+        if (this.segments == null) return null;
+        Segment segment = this.segments.get(segmentName);
         if (segment == null) {
             // generate the segment
             try {
@@ -141,7 +141,7 @@ public class Segments implements Iterable<Segment> {
                         this.maxFileSize,
                         this.useTailCache,
                         this.exceed134217727);
-            } catch (IOException e) {
+            } catch (final IOException e) {
                 Log.logException(e);
                 return null;
             }
@@ -149,28 +149,28 @@ public class Segments implements Iterable<Segment> {
         }
         return segment;
     }
-    
+
     public long URLCount() {
         if (this.segments == null) return 0;
         long c = 0;
-        for (Segment s: this.segments.values()) c += (long) s.urlMetadata().size();
+        for (final Segment s: this.segments.values()) c += s.urlMetadata().size();
         return c;
     }
-    
+
     public long RWICount() {
         if (this.segments == null) return 0;
         long c = 0;
-        for (Segment s: this.segments.values()) c += (long) s.termIndex().sizesMax();
+        for (final Segment s: this.segments.values()) c += s.termIndex().sizesMax();
         return c;
     }
-    
+
     public int RWIBufferCount() {
         if (this.segments == null) return 0;
         int c = 0;
-        for (Segment s: this.segments.values()) c += s.termIndex().getBufferSize();
+        for (final Segment s: this.segments.values()) c += s.termIndex().getBufferSize();
         return c;
     }
-    
+
     public MetadataRepository urlMetadata(final Process process) {
         return segment(this.process_assignment.get(process)).urlMetadata();
     }
@@ -178,11 +178,11 @@ public class Segments implements Iterable<Segment> {
     public IndexCell<WordReference> termIndex(final Process process) {
         return segment(this.process_assignment.get(process)).termIndex();
     }
-    
+
     public void clear(final Process process) {
         segment(this.process_assignment.get(process)).clear();
     }
-    
+
     public File getLocation(final Process process) {
         return segment(this.process_assignment.get(process)).getLocation();
     }
@@ -190,16 +190,16 @@ public class Segments implements Iterable<Segment> {
     public void close(final Process process) {
         segment(this.process_assignment.get(process)).close();
     }
-    
+
     public void close() {
-        if (segments != null) for (Segment s: this.segments.values()) s.close();
+        if (this.segments != null) for (final Segment s: this.segments.values()) s.close();
         this.segments = null;
     }
 
     public void finalize() {
         this.close();
     }
-    
+
     public synchronized Segment.ReferenceCleaner getReferenceCleaner(final String segmentName, final byte[] startHash) {
         return segment(segmentName).getReferenceCleaner(startHash);
     }
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 2a0d818e9..d1a6cfaf4 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -247,7 +247,6 @@ public final class Switchboard extends serverSwitch {
 
     private final Semaphore shutdownSync = new Semaphore(0);
     private boolean terminate = false;
-    public SolrChardingConnector solrConnector = null;
 
     //private Object  crawlingPausedSync = new Object();
     //private boolean crawlingIsPaused = false;
@@ -592,10 +591,10 @@ public final class Switchboard extends serverSwitch {
         final String solrurls = getConfig("federated.service.solr.indexing.url", "http://127.0.0.1:8983/solr");
         final boolean usesolr = getConfigBool("federated.service.solr.indexing.enabled", false) & solrurls.length() > 0;
         try {
-            this.solrConnector = (usesolr) ? new SolrChardingConnector(solrurls, workingScheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null;
+            this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr((usesolr) ? new SolrChardingConnector(solrurls, workingScheme, SolrChardingSelection.Method.MODULO_HOST_MD5) : null);
         } catch (final IOException e) {
             Log.logException(e);
-            this.solrConnector = null;
+            this.indexSegments.segment(Segments.Process.LOCALCRAWLING).connectSolr(null);
         }
 
         // start a loader
@@ -1314,7 +1313,6 @@ public final class Switchboard extends serverSwitch {
         Cache.close();
         this.tables.close();
         Domains.close();
-        if (this.solrConnector != null && getConfigBool("federated.service.solr.indexing.enabled", false)) this.solrConnector.close();
         AccessTracker.dumpLog(new File("DATA/LOG/queries.log"));
         UPnP.deletePortMapping();
         Tray.removeTray();
@@ -1989,7 +1987,7 @@ public final class Switchboard extends serverSwitch {
 
     public indexingQueueEntry condenseDocument(final indexingQueueEntry in) {
         in.queueEntry.updateStatus(Response.QUEUE_STATE_CONDENSING);
-        if (this.solrConnector != null && getConfigBool("federated.service.solr.indexing.enabled", false)/*in.queueEntry.profile().pushSolr()*/) {
+        if (this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr() != null && getConfigBool("federated.service.solr.indexing.enabled", false)/*in.queueEntry.profile().pushSolr()*/) {
             // send the documents to solr
             for (final Document doc: in.documents) {
                 try {
@@ -2000,7 +1998,7 @@ public final class Switchboard extends serverSwitch {
                         // in case that this happens it appears that the doc id is the right one
                     }
                     try {
-                        this.solrConnector.add(id, in.queueEntry.getResponseHeader(), doc);
+                        this.indexSegments.segment(Segments.Process.LOCALCRAWLING).getSolr().add(id, in.queueEntry.getResponseHeader(), doc);
                     } catch (final IOException e) {
                         Log.logWarning("SOLR", "failed to send " + in.queueEntry.url().toNormalform(true, false) + " to solr: " + e.getMessage());
                     }
diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java
index 77f7a9e77..5ed50cbef 100644
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@@ -24,6 +24,7 @@
 
 package de.anomic.search;
 
+import java.io.ByteArrayInputStream;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
@@ -34,6 +35,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import net.yacy.cora.document.ASCII;
+import net.yacy.cora.document.UTF8;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.cora.storage.ARC;
 import net.yacy.cora.storage.ConcurrentARC;
@@ -140,6 +142,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
 
     public TextSnippet(
             final LoaderDispatcher loader,
+            final String solrText,
             final URIMetadataRow.Components comp,
             final HandleSet queryhashes,
             final CacheStrategy cacheStrategy,
@@ -156,7 +159,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
         }
 
         // try to get snippet from snippetCache
-        ResultClass source = ResultClass.SOURCE_CACHE;
+        final ResultClass source = ResultClass.SOURCE_CACHE;
         final String wordhashes = yacySearch.set2string(queryhashes);
         final String urls = ASCII.String(url.hash());
         String snippetLine = snippetsCache.get(wordhashes, urls);
@@ -165,32 +168,37 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
             init(url.hash(), snippetLine, source, null);
             return;
         }
-        
+
+        // try to get the snippet from a document at the cache (or in the web)
+        // this requires that the document is parsed after loading
         String textline = null;
         HandleSet remainingHashes = queryhashes;
         { //encapsulate potential expensive sentences
-	        final Collection<StringBuilder> sentences;
-	        { //encapsulate potential expensive document 
-		        final Document document = loadDocument(loader, comp, queryhashes, cacheStrategy, url, reindexing, source);
-		        if (document == null) {
-		            return;
-		        }
-		
-		        /* ===========================================================================
-		         * COMPUTE SNIPPET
-		         * =========================================================================== */
-		        // we have found a parseable non-empty file: use the lines
-		
-		        // compute snippet from text
-		        sentences = document.getSentences(pre);
-		        document.close();
-	        } //encapsulate potential expensive document END
-	        
-	        if (sentences == null) {
-	            init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
-	            return;
-	        }
-	        
+	        Collection<StringBuilder> sentences = null;
+
+	        // try the solr text first
+	        if (solrText != null) {
+                // compute sentences from solr query
+                sentences = Document.getSentences(pre, new ByteArrayInputStream(UTF8.getBytes(solrText)));
+            }
+
+            // if then no sentences are found, we fail-over to get the content from the re-loaded document
+            if (sentences == null) {
+    	        final Document document = loadDocument(loader, comp, queryhashes, cacheStrategy, url, reindexing, source);
+    	        if (document == null) {
+    	            return;
+    	        }
+
+    	        // compute sentences from parsed document
+    	        sentences = document.getSentences(pre);
+    	        document.close();
+
+                if (sentences == null) {
+                    init(url.hash(), null, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences");
+                    return;
+                }
+            }
+
 	        try {
 	        	final SnippetExtractor tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
 	            textline = tsr.getSnippet();
@@ -227,7 +235,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
 //        document.close();
         init(url.hash(), snippetLine, source, null);
     }
-    
+
     private Document loadDocument(
     		final LoaderDispatcher loader,
     		final URIMetadataRow.Components comp,
diff --git a/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java b/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
index 3f39ac2a7..e732e69f3 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrChardingConnector.java
@@ -34,14 +34,13 @@ import net.yacy.cora.protocol.Domains;
 import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.document.Document;
 import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.kelondro.logging.Log;
 
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrInputDocument;
 
 
-public class SolrChardingConnector {
+public class SolrChardingConnector implements SolrConnector {
 
     private final List<SolrSingleConnector> connectors;
     private final SolrScheme scheme;
@@ -164,13 +163,7 @@ public class SolrChardingConnector {
         final long[] size = new long[this.connectors.size()];
         int i = 0;
         for (final SolrSingleConnector connector: this.connectors) {
-            try {
-                final SolrDocumentList list = connector.get("*:*", 0, 1);
-                size[i++] = list.getNumFound();
-            } catch (final Exception e) {
-                Log.logException(e);
-                size[i++] = 0;
-            }
+            size[i++] = connector.getSize();
         }
         return size;
     }
diff --git a/source/net/yacy/cora/services/federated/solr/SolrConnector.java b/source/net/yacy/cora/services/federated/solr/SolrConnector.java
new file mode 100644
index 000000000..c20693ba2
--- /dev/null
+++ b/source/net/yacy/cora/services/federated/solr/SolrConnector.java
@@ -0,0 +1,99 @@
+/**
+ *  SolrConnector
+ *  Copyright 2011 by Michael Peter Christen
+ *  First released 13.09.2011 at http://yacy.net
+ *
+ *  $LastChangedDate: 2011-04-14 22:05:04 +0200 (Do, 14 Apr 2011) $
+ *  $LastChangedRevision: 7654 $
+ *  $LastChangedBy: orbiter $
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.cora.services.federated.solr;
+
+import java.io.IOException;
+import java.util.List;
+
+import net.yacy.cora.protocol.ResponseHeader;
+import net.yacy.document.Document;
+import net.yacy.kelondro.data.meta.DigestURI;
+
+import org.apache.solr.common.SolrDocumentList;
+
+public interface SolrConnector {
+
+    /**
+     * with a scheme the fields of a SolrDocument can be translated to actual data values
+     * @return the solr scheme that can translate the SolrDocument
+     */
+    public SolrScheme getScheme();
+
+    public void close();
+
+    /**
+     * delete everything in the solr index
+     * @throws IOException
+     */
+    public void clear() throws IOException;
+
+    /**
+     * delete an entry from solr
+     * @param id the url hash of the entry
+     * @throws IOException
+     */
+    public void delete(final String id) throws IOException;
+
+    /**
+     * delete a set of entries from solr; entries are identified by their url hash
+     * @param ids a list of url hashes
+     * @throws IOException
+     */
+    public void delete(final List<String> ids) throws IOException;
+
+    /**
+     * add a YaCy document. This calls the scheme processor to add the document as solr document
+     * @param id the url hash of the entry
+     * @param header the http response header
+     * @param doc the YaCy document
+     * @throws IOException
+     */
+    public void add(final String id, final ResponseHeader header, final Document doc) throws IOException;
+
+    /**
+     * register an entry as error document
+     * @param digestURI
+     * @param failReason
+     * @param httpstatus
+     * @throws IOException
+     */
+    public void err(final DigestURI digestURI, final String failReason, final int httpstatus) throws IOException;
+
+
+    /**
+     * get a query result from solr
+     * to get all results set the query String to "*:*"
+     * @param querystring
+     * @throws IOException
+     */
+    public SolrDocumentList get(final String querystring, final int offset, final int count) throws IOException;
+
+    /**
+     * get the size of the index
+     * @return number of results if solr is queries with a catch-all pattern
+     */
+    public long getSize();
+
+}
diff --git a/source/net/yacy/cora/services/federated/solr/SolrScheme.java b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
index ab8909d0b..b191f24ec 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrScheme.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrScheme.java
@@ -27,6 +27,8 @@ package net.yacy.cora.services.federated.solr;
 
 import java.io.File;
 import java.net.InetAddress;
+import java.net.MalformedURLException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
 import java.util.Map;
@@ -44,6 +46,7 @@ import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;
 
 public class SolrScheme extends ConfigurationSet {
@@ -349,6 +352,46 @@ public class SolrScheme extends ConfigurationSet {
         return solrdoc;
     }
 
+    public String solrGetID(final SolrDocument solr) {
+        return (String) solr.getFieldValue("id");
+    }
+
+    public DigestURI solrGetURL(final SolrDocument solr) {
+        try {
+            return new DigestURI((String) solr.getFieldValue("sku"));
+        } catch (final MalformedURLException e) {
+            return null;
+        }
+    }
+
+    public String solrGetTitle(final SolrDocument solr) {
+        return (String) solr.getFieldValue("title");
+    }
+
+    public String solrGetText(final SolrDocument solr) {
+        return (String) solr.getFieldValue("text_t");
+    }
+
+    public String solrGetAuthor(final SolrDocument solr) {
+        return (String) solr.getFieldValue("author");
+    }
+
+    public String solrGetDescription(final SolrDocument solr) {
+        return (String) solr.getFieldValue("description");
+    }
+
+    public Date solrGetDate(final SolrDocument solr) {
+        return (Date) solr.getFieldValue("last_modified");
+    }
+
+    public Collection<String> solrGetKeywords(final SolrDocument solr) {
+        final Collection<Object> c = solr.getFieldValues("keywords");
+        final ArrayList<String> a = new ArrayList<String>();
+        for (final Object s: c) {
+            a.add((String) s);
+        }
+        return a;
+    }
 
     /*
      * standard solr scheme
diff --git a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
index e75ee0fd5..798c09322 100644
--- a/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
+++ b/source/net/yacy/cora/services/federated/solr/SolrSingleConnector.java
@@ -57,7 +57,7 @@ import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 
 
-public class SolrSingleConnector {
+public class SolrSingleConnector implements SolrConnector {
 
     private final String solrurl, host, solrpath, solraccount, solrpw;
     private final int port;
@@ -178,6 +178,22 @@ public class SolrSingleConnector {
         }
     }
 
+    @Override
+    public SolrScheme getScheme() {
+        return this.scheme;
+    }
+
+    @Override
+    public long getSize() {
+        try {
+            final SolrDocumentList list = get("*:*", 0, 1);
+            return list.getNumFound();
+        } catch (final Exception e) {
+            Log.logException(e);
+            return 0;
+        }
+    }
+
     /**
      * delete everything in the solr index
      * @throws IOException
@@ -325,6 +341,16 @@ public class SolrSingleConnector {
         //return result;
     }
 
+
+    public String getAdminInterface() {
+        final InetAddress localhostExternAddress = Domains.myPublicLocalIP();
+        final String localhostExtern = localhostExternAddress == null ? "127.0.0.1" : localhostExternAddress.getHostAddress();
+        String u = this.solrurl;
+        int p = u.indexOf("localhost"); if (p < 0) p = u.indexOf("127.0.0.1");
+        if (p >= 0) u = u.substring(0, p) + localhostExtern + u.substring(p + 9);
+        return u + (u.endsWith("/") ? "admin/" : "/admin/");
+    }
+
     public static void main(final String args[]) {
         SolrSingleConnector solr;
         try {
@@ -347,5 +373,4 @@ public class SolrSingleConnector {
             e.printStackTrace();
         }
     }
-
 }
diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java
index 666d64358..56c93bb56 100644
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@@ -312,8 +312,12 @@ dc_rights
     }
 
     public List<StringBuilder> getSentences(final boolean pre) {
-        if (this.text == null) return null;
-        final SentenceReader e = new SentenceReader(getText());
+        return getSentences(pre, getText());
+    }
+
+    public static List<StringBuilder> getSentences(final boolean pre, final InputStream text) {
+        if (text == null) return null;
+        final SentenceReader e = new SentenceReader(text);
         e.pre(pre);
         final List<StringBuilder> sentences = new ArrayList<StringBuilder>();
         while (e.hasNext()) {