Moved solr index-add method to the same method where the YaCy index is

written. Also done some code-cleanup.
2024-09-19 00:01:41 +02:00 · 2012-07-25 01:53:47 +02:00 · 2012-07-25 01:53:47 +02:00 · 6f1ddb2519
commit 6f1ddb2519
parent 315d83cfa0
14 changed files with 275 additions and 227 deletions
--- a/htroot/IndexFederated_p.java
+++ b/htroot/IndexFederated_p.java
@ -137,7 +137,7 @@ public class IndexFederated_p {
            }

            // read index scheme table flags
-            final Iterator<ConfigurationSet.Entry> i = sb.solrScheme.entryIterator();
+            final Iterator<ConfigurationSet.Entry> i = sb.index.getSolrScheme().entryIterator();
            ConfigurationSet.Entry entry;
            boolean modified = false; // flag to remember changes
            while (i.hasNext()) {
@ -160,7 +160,7 @@ public class IndexFederated_p {
            }
            if (modified) { // save settings to config file if modified
                try {
-                    sb.solrScheme.commit();
+                    sb.index.getSolrScheme().commit();
                    modified = false;
                } catch (IOException ex) {}
            }
@ -191,7 +191,7 @@ public class IndexFederated_p {
        // use enum SolrField to keep defined order
        for(SolrField field : SolrField.values()) {
            prop.put("scheme_" + c + "_dark", dark ? 1 : 0); dark = !dark;
-            prop.put("scheme_" + c + "_checked", sb.solrScheme.contains(field.name()) ? 1 : 0);
+            prop.put("scheme_" + c + "_checked", sb.index.getSolrScheme().contains(field.name()) ? 1 : 0);
            prop.putHTML("scheme_" + c + "_key", field.name());
            prop.putHTML("scheme_" + c + "_solrfieldname",field.name().equalsIgnoreCase(field.getSolrFieldName()) ? "" : field.getSolrFieldName());
            if (field.getComment() != null) prop.putHTML("scheme_" + c + "_comment",field.getComment());
--- a/htroot/api/schema_p.java
+++ b/htroot/api/schema_p.java
@ -24,6 +24,7 @@

 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.search.Switchboard;
+import net.yacy.search.index.SolrConfiguration;
 import net.yacy.search.index.SolrField;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -37,8 +38,9 @@ public class schema_p {

        // write scheme
        int c = 0;
+        SolrConfiguration solrScheme = sb.index.getSolrScheme();
        for (SolrField field : SolrField.values()) {
-            if (sb.solrScheme.contains(field.name())) {
+            if (solrScheme.contains(field.name())) {
                prop.put("fields_" + c + "_solrname", field.getSolrFieldName());
                prop.put("fields_" + c + "_type", field.getType().printName());
                prop.put("fields_" + c + "_comment", field.getComment());
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@ -81,8 +81,8 @@ public class CrawlQueues {
        this.log.logConfig("Starting Crawling Management");
        this.noticeURL = new NoticedURL(queuePath, sb.peers.myBotIDs(), sb.useTailCache, sb.exceed134217727);
        FileUtils.deletedelete(new File(queuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(sb.index.getSolr(), sb.solrScheme, queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
-        this.delegatedURL = new ZURL(sb.index.getSolr(), sb.solrScheme, queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
+        this.errorURL = new ZURL(sb.index.getSolr(), sb.index.getSolrScheme(), queuePath, ERROR_DB_FILENAME, false, sb.useTailCache, sb.exceed134217727);
+        this.delegatedURL = new ZURL(sb.index.getSolr(), sb.index.getSolrScheme(), queuePath, DELEGATED_DB_FILENAME, true, sb.useTailCache, sb.exceed134217727);
    }

    public void relocate(final File newQueuePath) {
@ -93,8 +93,8 @@ public class CrawlQueues {

        this.noticeURL = new NoticedURL(newQueuePath, this.sb.peers.myBotIDs(), this.sb.useTailCache, this.sb.exceed134217727);
        FileUtils.deletedelete(new File(newQueuePath, ERROR_DB_FILENAME));
-        this.errorURL = new ZURL(this.sb.index.getSolr(), this.sb.solrScheme, newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
-        this.delegatedURL = new ZURL(this.sb.index.getSolr(), this.sb.solrScheme, newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
+        this.errorURL = new ZURL(this.sb.index.getSolr(), this.sb.index.getSolrScheme(), newQueuePath, ERROR_DB_FILENAME, false, this.sb.useTailCache, this.sb.exceed134217727);
+        this.delegatedURL = new ZURL(this.sb.index.getSolr(), this.sb.index.getSolrScheme(), newQueuePath, DELEGATED_DB_FILENAME, true, this.sb.useTailCache, this.sb.exceed134217727);
    }

    public synchronized void close() {
--- a/source/net/yacy/cora/protocol/ResponseHeader.java
+++ b/source/net/yacy/cora/protocol/ResponseHeader.java
@ -159,4 +159,12 @@ public class ResponseHeader extends HeaderFramework {
        }
        return Charset.forName(charSetName);
    }
+
+    public String getXRobotsTag() {
+        String x_robots_tag = this.get(HeaderFramework.X_ROBOTS_TAG, "");
+        if (x_robots_tag.isEmpty()) {
+            x_robots_tag = this.get(HeaderFramework.X_ROBOTS, "");
+        }
+        return x_robots_tag;
+    }
 }
--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@ -786,7 +786,8 @@ public final class Protocol
        // store remote result to local result container
        // insert one container into the search result buffer
        // one is enough, only the references are used, not the word
-        containerCache.add(container.get(0), false, target.getName() + "/" + target.hash, result.joincount, true, time);
+        containerCache.add(container.get(0), false, target.getName() + "/" + target.hash, result.joincount, time);
+        containerCache.addFinalize();
        containerCache.addExpectedRemoteReferences(-count);

        // insert the containers to the index
--- a/source/net/yacy/search/IndexingQueueEntry.java
+++ b/source/net/yacy/search/IndexingQueueEntry.java
@ -0,0 +1,41 @@
+/**
+ *  IndexingQueueEntry
+ *  Copyright 2012 by Michael Peter Christen
+ *  First released 24.07.2012 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+package net.yacy.search;
+
+import net.yacy.document.Condenser;
+import net.yacy.document.Document;
+import net.yacy.kelondro.workflow.WorkflowJob;
+import de.anomic.crawler.retrieval.Response;
+
+public class IndexingQueueEntry extends WorkflowJob {
+
+    public Response queueEntry;
+    public Document[] documents;
+    public Condenser[] condenser;
+
+    public IndexingQueueEntry(final Response queueEntry, final Document[] documents, final Condenser[] condenser) {
+        super();
+        this.queueEntry = queueEntry;
+        this.documents = documents;
+        this.condenser = condenser;
+    }
+}
--- a/source/net/yacy/search/Shutdown.java
+++ b/source/net/yacy/search/Shutdown.java
@ -0,0 +1,47 @@
+/**
+ *  Shutdown
+ *  Copyright 2012 by Michael Peter Christen
+ *  First released 24.07.2012 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.search;
+
+import net.yacy.kelondro.logging.Log;
+
+public class Shutdown extends Thread {
+    private final Switchboard sb;
+    private final long delay;
+    private final String reason;
+
+    public Shutdown(final Switchboard sb, final long delay, final String reason) {
+        this.sb = sb;
+        this.delay = delay;
+        this.reason = reason;
+    }
+
+    @Override
+    public void run() {
+        try {
+            Thread.sleep(this.delay);
+        } catch ( final InterruptedException e ) {
+            this.sb.getLog().logInfo("interrupted delayed shutdown");
+        } catch ( final Exception e ) {
+            Log.logException(e);
+        }
+        this.sb.terminate(this.reason);
+    }
+}
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -97,7 +97,6 @@ import net.yacy.cora.protocol.http.ProxySettings;
 import net.yacy.cora.services.federated.solr.ShardSelection;
 import net.yacy.cora.services.federated.solr.ShardSolrConnector;
 import net.yacy.cora.services.federated.solr.SolrConnector;
-import net.yacy.cora.services.federated.solr.SolrDoc;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@ -251,7 +250,6 @@ public final class Switchboard extends serverSwitch
    public SeedDB peers;
    public WorkTables tables;
    public Tray tray;
-    public SolrConfiguration solrScheme;

    public WorkflowProcessor<IndexingQueueEntry> indexingDocumentProcessor;
    public WorkflowProcessor<IndexingQueueEntry> indexingCondensementProcessor;
@ -376,16 +374,6 @@ public final class Switchboard extends serverSwitch
        this.networkRoot.mkdirs();
        this.queuesRoot.mkdirs();

-        // initialize index
-        ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
-        final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
-        this.index = new Segment(this.log, new File(segmentsPath, "default"));
-        final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_URLDB, true)) this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
-        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_SOLR, true)) this.index.connectLocalSolr(connectWithinMs);
-
        // prepare a solr index profile switch list
        final File solrBackupProfile = new File("defaults/solr.keys.list");
        final String schemename = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_SCHEMEFILE, "solr.keys.default.list");
@ -395,11 +383,21 @@ public final class Switchboard extends serverSwitch
        }
        final boolean solrlazy = getConfigBool(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_LAZY, true);
        final SolrConfiguration backupScheme = new SolrConfiguration(solrBackupProfile, solrlazy);
-        this.solrScheme = new SolrConfiguration(solrWorkProfile, solrlazy);
-
+        final SolrConfiguration solrScheme = new SolrConfiguration(solrWorkProfile, solrlazy);
        // update the working scheme with the backup scheme. This is necessary to include new features.
        // new features are always activated by default (if activated in input-backupScheme)
-        this.solrScheme.fill(backupScheme, true);
+        solrScheme.fill(backupScheme, true);
+
+        // initialize index
+        ReferenceContainer.maxReferences = getConfigInt("index.maxReferences", 0);
+        final File segmentsPath = new File(new File(indexPath, networkName), "SEGMENTS");
+        this.index = new Segment(this.log, new File(segmentsPath, "default"), solrScheme);
+        final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_URLDB, true)) this.index.connectUrlDb(this.useTailCache, this.exceed134217727);
+        if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_SOLR, true)) this.index.connectLocalSolr(connectWithinMs);
+

        // set up the solr interface
        final String solrurls = getConfig(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_URL, "http://127.0.0.1:8983/solr");
@ -1133,6 +1131,9 @@ public final class Switchboard extends serverSwitch
        // switch the networks
        synchronized ( this ) {

+            // remember the solr scheme
+            SolrConfiguration solrScheme = this.index.getSolrScheme();
+
            // shut down
            this.crawler.close();
            if ( this.dhtDispatcher != null ) {
@ -1179,7 +1180,7 @@ public final class Switchboard extends serverSwitch
                partitionExponent,
                this.useTailCache,
                this.exceed134217727);
-            this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"));
+            this.index = new Segment(this.log, new File(new File(new File(indexPrimaryPath, networkName), "SEGMENTS"), "default"), solrScheme);
            final int connectWithinMs = this.getConfigInt(SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_COMMITWITHINMS, 180000);
            if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) this.index.connectRWI(wordCacheMaxCount, fileSizeMax);
            if (this.getConfigBool(SwitchboardConstants.CORE_SERVICE_CITATION, true)) this.index.connectCitation(wordCacheMaxCount, fileSizeMax);
@ -2395,55 +2396,8 @@ public final class Switchboard extends serverSwitch
            return new IndexingQueueEntry(in.queueEntry, in.documents, null);
        }

-        boolean localSolr = this.index.connectedLocalSolr();
-        boolean remoteSolr = this.index.connectedRemoteSolr();
-        if (localSolr || remoteSolr) {
-            // send the documents to solr
-            for ( final Document doc : in.documents ) {
-                try {
-                    final String id = UTF8.String(new DigestURI(doc.dc_identifier()).hash());
-                    final String iquh = UTF8.String(in.queueEntry.url().hash());
-                    if ( !id.equals(iquh) ) {
-                        this.log.logWarning("condenseDocument consistency check doc="
-                            + id
-                            + ":"
-                            + doc.dc_identifier()
-                            + ", query="
-                            + iquh
-                            + ":"
-                            + in.queueEntry.url());
-                        // in case that this happens it appears that the doc id is the right one
-                    }
-                    try {
-                        SolrDoc solrDoc = this.solrScheme.yacy2solr(id, in.queueEntry.getResponseHeader(), doc);
-                        this.index.getSolr().add(solrDoc);
-                    } catch ( final IOException e ) {
-                        Log.logWarning(
-                            "SOLR",
-                            "failed to send "
-                                + in.queueEntry.url().toNormalform(true, false)
-                                + " to solr: "
-                                + e.getMessage());
-                    }
-                } catch ( final MalformedURLException e ) {
-                    Log.logException(e);
-                    continue;
-                }
-            }
-        }
-
-        // check if we should accept the document for our index
-        if (!this.getConfigBool(SwitchboardConstants.CORE_SERVICE_RWI, true)) {
-            if ( this.log.isInfo() ) {
-                this.log.logInfo("Not Condensed Resource '"
-                    + in.queueEntry.url().toNormalform(false, true)
-                    + "': indexing not wanted by federated rule for YaCy");
-            }
-            return new IndexingQueueEntry(in.queueEntry, in.documents, null);
-        }
-        final List<Document> doclist = new ArrayList<Document>();
-
        // check which files may take part in the indexing process
+        final List<Document> doclist = new ArrayList<Document>();
        for ( final Document document : in.documents ) {
            if ( document.indexingDenied() ) {
                if ( this.log.isInfo() ) {
@ -2569,6 +2523,7 @@ public final class Switchboard extends serverSwitch
                    queueEntry.lastModified(),
                    new Date(),
                    queueEntry.size(),
+                    queueEntry.getResponseHeader(),
                    document,
                    condenser,
                    searchEvent,
--- a/source/net/yacy/search/index/DocumentIndex.java
+++ b/source/net/yacy/search/index/DocumentIndex.java
@ -73,9 +73,9 @@ public class DocumentIndex extends Segment

    static final ThreadGroup workerThreadGroup = new ThreadGroup("workerThreadGroup");

-    public DocumentIndex(final File segmentPath, final CallbackListener callback, final int cachesize)
+    public DocumentIndex(final File segmentPath, final File schemePath, final CallbackListener callback, final int cachesize)
        throws IOException {
-        super(new Log("DocumentIndex"), segmentPath);
+        super(new Log("DocumentIndex"), segmentPath, schemePath == null ? null : new SolrConfiguration(schemePath, true));
        super.connectRWI(cachesize, targetFileSize * 4 - 1);
        super.connectCitation(cachesize, targetFileSize * 4 - 1);
        super.connectUrlDb(
@ -174,6 +174,7 @@ public class DocumentIndex extends Segment
                    new Date(url.lastModified()),
                    new Date(),
                    url.length(),
+                    null,
                    document,
                    condenser,
                    null,
@ -306,7 +307,7 @@ public class DocumentIndex extends Segment
        try {
            if ( args[1].equals("add") ) {
                final DigestURI f = new DigestURI(args[2]);
-                final DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
+                final DocumentIndex di = new DocumentIndex(segmentPath, null, callback, 100000);
                di.addConcurrent(f);
                di.close();
            } else {
@ -315,7 +316,7 @@ public class DocumentIndex extends Segment
                    query += args[i];
                }
                query.trim();
-                final DocumentIndex di = new DocumentIndex(segmentPath, callback, 100000);
+                final DocumentIndex di = new DocumentIndex(segmentPath, null, callback, 100000);
                final ArrayList<DigestURI> results = di.find(query, 100);
                for ( final DigestURI f : results ) {
                    if ( f != null ) {
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -39,7 +39,9 @@ import net.yacy.cora.document.ASCII;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.document.UTF8;
 import net.yacy.cora.order.ByteOrder;
+import net.yacy.cora.protocol.ResponseHeader;
 import net.yacy.cora.services.federated.solr.SolrConnector;
+import net.yacy.cora.services.federated.solr.SolrDoc;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
@ -100,15 +102,16 @@ public class Segment {

    private   final Log                            log;
    private   final File                           segmentPath;
+    private   final SolrConfiguration              solrScheme;
    protected final MetadataRepository             urlMetadata;
    protected       IndexCell<WordReference>       termIndex;
    protected       IndexCell<CitationReference>   urlCitationIndex;

-    public Segment(final Log log, final File segmentPath) {
-
+    public Segment(final Log log, final File segmentPath, final SolrConfiguration solrScheme) {
        log.logInfo("Initializing Segment '" + segmentPath + ".");
        this.log = log;
        this.segmentPath = segmentPath;
+        this.solrScheme = solrScheme;

        // create LURL-db
        this.urlMetadata = new MetadataRepository(segmentPath);
@ -197,10 +200,15 @@ public class Segment {
    public void disconnectLocalSolr() {
        this.urlMetadata.disconnectLocalSolr();
    }
+
    public SolrConnector getSolr() {
        return this.urlMetadata.getSolr();
    }

+    public SolrConfiguration getSolrScheme() {
+        return this.solrScheme;
+    }
+
    public SolrConnector getRemoteSolr() {
        return this.urlMetadata.getRemoteSolr();
    }
@ -318,94 +326,6 @@ public class Segment {
        return this.segmentPath;
    }

-    /**
-     * this is called by the switchboard to put in a new page into the index
-     * use all the words in one condenser object to simultanous create index entries
-     *
-     * @param url
-     * @param urlModified
-     * @param document
-     * @param condenser
-     * @param language
-     * @param doctype
-     * @param outlinksSame
-     * @param outlinksOther
-     * @return
-     */
-    private int addPageIndex(
-            final DigestURI url,
-            final Date urlModified,
-            final Document document,
-            final Condenser condenser,
-            final String language,
-            final char doctype,
-            final int outlinksSame,
-            final int outlinksOther,
-            final SearchEvent searchEvent,
-            final String sourceName) {
-        final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
-        int wordCount = 0;
-        final int urlLength = url.toNormalform(true, true).length();
-        final int urlComps = MultiProtocolURI.urlComps(url.toString()).length;
-
-        // iterate over all words of content text
-        final Iterator<Map.Entry<String, Word>> i = condenser.words().entrySet().iterator();
-        Map.Entry<String, Word> wentry;
-        String word;
-        final int len = (document == null) ? urlLength : document.dc_title().length();
-        final WordReferenceRow ientry = new WordReferenceRow(url.hash(),
-                                urlLength, urlComps, len,
-                                condenser.RESULT_NUMB_WORDS,
-                                condenser.RESULT_NUMB_SENTENCES,
-                                urlModified.getTime(),
-                                System.currentTimeMillis(),
-                                UTF8.getBytes(language),
-                                doctype,
-                                outlinksSame, outlinksOther);
-        Word wprop = null;
-        byte[] wordhash;
-        while (i.hasNext()) {
-            wentry = i.next();
-            word = wentry.getKey();
-            wprop = wentry.getValue();
-            assert (wprop.flags != null);
-            ientry.setWord(wprop);
-            wordhash = Word.word2hash(word);
-            if (this.termIndex != null) try {
-                this.termIndex.add(wordhash, ientry);
-            } catch (final Exception e) {
-                Log.logException(e);
-            }
-            wordCount++;
-
-            // during a search event it is possible that a heuristic is used which aquires index
-            // data during search-time. To transfer indexed data directly to the search process
-            // the following lines push the index data additionally to the search process
-            // this is done only for searched words
-            if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) {
-                // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
-                ReferenceContainer<WordReference> container;
-                try {
-                    container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
-                    container.add(ientry);
-                    rankingProcess.add(container, true, sourceName, -1, !i.hasNext(), 5000);
-                } catch (final RowSpaceExceededException e) {
-                    continue;
-                }
-            }
-        }
-
-        // assign the catchall word
-        ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics
-        if (this.termIndex != null) try {
-            this.termIndex.add(catchallHash, ientry);
-        } catch (final Exception e) {
-            Log.logException(e);
-        }
-
-        return wordCount;
-    }
-
    private int addCitationIndex(final DigestURI url, final Date urlModified, final Map<MultiProtocolURI, Properties> anchors) {
    	if (anchors == null) return 0;
    	int refCount = 0;
@ -433,25 +353,12 @@ public class Segment {
        if (this.urlCitationIndex != null) this.urlCitationIndex.close();
    }

-    public URIMetadataRow storeDocument(
-            final DigestURI url,
-            final DigestURI referrerURL,
-            Date modDate,
-            final Date loadDate,
-            final long sourcesize,
-            final Document document,
-            final Condenser condenser,
-            final SearchEvent searchEvent,
-            final String sourceName
-            ) throws IOException {
-        final long startTime = System.currentTimeMillis();
-
-        // CREATE INDEX
-
-        // load some document metadata
-        final String dc_title = document.dc_title();
-
-        // do a identification of the language
+    private String votedLanguage(
+                    final DigestURI url,
+                    final String urlNormalform,
+                    final Document document,
+                    final Condenser condenser) {
+     // do a identification of the language
        String language = condenser.language(); // this is a statistical analysation of the content: will be compared with other attributes
        final String bymetadata = document.dc_language(); // the languageByMetadata may return null if there was no declaration
        if (language == null) {
@ -466,7 +373,7 @@ public class Segment {
                else {
                    final String error = "LANGUAGE-BY-STATISTICS: " + url + " CONFLICTING: " + language + " (the language given by the TLD is " + url.language() + ")";
                    // see if we have a hint in the url that the statistic was right
-                    final String u = url.toNormalform(true, false).toLowerCase();
+                    final String u = urlNormalform.toLowerCase();
                    if (!u.contains("/" + language + "/") && !u.contains("/" + ISO639.country(language).toLowerCase() + "/")) {
                        // no confirmation using the url, use the TLD
                        language = url.language();
@ -491,9 +398,46 @@ public class Segment {
                }
            }
        }
+        return language;
+    }

-        // create a new loaded URL db entry
-        if (modDate.getTime() > loadDate.getTime()) modDate = loadDate;
+    public URIMetadataRow storeDocument(
+            final DigestURI url,
+            final DigestURI referrerURL,
+            Date modDate,
+            final Date loadDate,
+            final long sourcesize,
+            final ResponseHeader responseHeader,
+            final Document document,
+            final Condenser condenser,
+            final SearchEvent searchEvent,
+            final String sourceName
+            ) throws IOException {
+        final long startTime = System.currentTimeMillis();
+
+        // CREATE INDEX
+
+        // load some document metadata
+        final String id = ASCII.String(url.hash());
+        final String dc_title = document.dc_title();
+        final String urlNormalform = url.toNormalform(true, false);
+        final String language = votedLanguage(url, urlNormalform, document, condenser); // identification of the language
+
+        // STORE TO SOLR
+        boolean localSolr = this.connectedLocalSolr();
+        boolean remoteSolr = this.connectedRemoteSolr();
+        if (localSolr || remoteSolr) {
+            try {
+                SolrDoc solrDoc = this.solrScheme.yacy2solr(id, responseHeader, document);
+                this.getSolr().add(solrDoc);
+            } catch ( final IOException e ) {
+                Log.logWarning("SOLR", "failed to send " + urlNormalform + " to solr: " + e.getMessage());
+            }
+        }
+
+        // STORE URL TO LOADED-URL-DB
+        if (modDate.getTime() > loadDate.getTime()) modDate = loadDate; // TODO: compare with modTime from responseHeader
+        char docType = Response.docType(document.dc_format());
        final URIMetadataRow newEntry = new URIMetadataRow(
                url,                                       // URL
                dc_title,                                  // document description
@ -509,7 +453,7 @@ public class Segment {
                new byte[0],                               // md5
                (int) sourcesize,                          // size
                condenser.RESULT_NUMB_WORDS,               // word count
-                Response.docType(document.dc_format()),    // doctype
+                docType,                                   // doctype
                condenser.RESULT_FLAGS,                    // flags
                UTF8.getBytes(language),                   // language
                document.inboundLinks().size(),            // inbound links
@ -519,25 +463,72 @@ public class Segment {
                document.getVideolinks().size(),           // lvideo
                document.getApplinks().size()              // lapp
        );
-
-        // STORE URL TO LOADED-URL-DB
-        this.urlMetadata.store(newEntry); // TODO: should be serialized; integrated in IODispatcher
-
+        this.urlMetadata.store(newEntry);
        final long storageEndTime = System.currentTimeMillis();

        // STORE PAGE INDEX INTO WORD INDEX DB
-        final int words = addPageIndex(
-                url,                                          // document url
-                modDate,                                      // document mod date
-                document,                                     // document content
-                condenser,                                    // document condenser
-                language,                                     // document language
-                Response.docType(document.dc_format()),       // document type
-                document.inboundLinks().size(),               // inbound links
-                document.outboundLinks().size(),              // outbound links
-                searchEvent,                                  // a search event that can have results directly
-                sourceName                                    // the name of the source where the index was created
-        );
+        int outlinksSame = document.inboundLinks().size();
+        int outlinksOther = document.outboundLinks().size();
+        final RWIProcess rankingProcess = (searchEvent == null) ? null : searchEvent.getRankingResult();
+        int wordCount = 0;
+        final int urlLength = urlNormalform.length();
+        final int urlComps = MultiProtocolURI.urlComps(url.toString()).length;
+
+        // create a word prototype which is re-used for all entries
+        final int len = (document == null) ? urlLength : document.dc_title().length();
+        final WordReferenceRow ientry = new WordReferenceRow(
+                        url.hash(),
+                        urlLength, urlComps, len,
+                        condenser.RESULT_NUMB_WORDS,
+                        condenser.RESULT_NUMB_SENTENCES,
+                        modDate.getTime(),
+                        System.currentTimeMillis(),
+                        UTF8.getBytes(language),
+                        docType,
+                        outlinksSame, outlinksOther);
+
+        // iterate over all words of content text
+        Word wprop = null;
+        byte[] wordhash;
+        String word;
+        for (Map.Entry<String, Word> wentry: condenser.words().entrySet()) {
+            word = wentry.getKey();
+            wprop = wentry.getValue();
+            assert (wprop.flags != null);
+            ientry.setWord(wprop);
+            wordhash = Word.word2hash(word);
+            if (this.termIndex != null) try {
+                this.termIndex.add(wordhash, ientry);
+            } catch (final Exception e) {
+                Log.logException(e);
+            }
+            wordCount++;
+
+            // during a search event it is possible that a heuristic is used which aquires index
+            // data during search-time. To transfer indexed data directly to the search process
+            // the following lines push the index data additionally to the search process
+            // this is done only for searched words
+            if (searchEvent != null && !searchEvent.getQuery().query_exclude_hashes.has(wordhash) && searchEvent.getQuery().query_include_hashes.has(wordhash)) {
+                // if the page was added in the context of a heuristic this shall ensure that findings will fire directly into the search result
+                ReferenceContainer<WordReference> container;
+                try {
+                    container = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, wordhash, 1);
+                    container.add(ientry);
+                    rankingProcess.add(container, true, sourceName, -1, 5000);
+                } catch (final RowSpaceExceededException e) {
+                    continue;
+                }
+            }
+        }
+        if (rankingProcess != null) rankingProcess.addFinalize();
+
+        // assign the catchall word
+        ientry.setWord(wprop == null ? catchallWord : wprop); // we use one of the word properties as template to get the document characteristics
+        if (this.termIndex != null) try {
+            this.termIndex.add(catchallHash, ientry);
+        } catch (final Exception e) {
+            Log.logException(e);
+        }

        // STORE PAGE REFERENCES INTO CITATION INDEX
        final int refs = addCitationIndex(url, modDate, document.getAnchors());
@ -546,10 +537,8 @@ public class Segment {
        final long indexingEndTime = System.currentTimeMillis();

        if (this.log.isInfo()) {
-            // TODO: UTF-8 docDescription seems not to be displayed correctly because
-            // of string concatenation
-            this.log.logInfo("*Indexed " + words + " words in URL " + url +
-                    " [" + ASCII.String(url.hash()) + "]" +
+            this.log.logInfo("*Indexed " + wordCount + " words in URL " + url +
+                    " [" + id + "]" +
                    "\n\tDescription:  " + dc_title +
                    "\n\tMimeType: "  + document.dc_format() + " | Charset: " + document.getCharset() + " | " +
                    "Size: " + document.getTextLength() + " bytes | " +
--- a/source/net/yacy/search/index/SolrConfiguration.java
+++ b/source/net/yacy/search/index/SolrConfiguration.java
@ -106,7 +106,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
    protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String[] value) {
        if ((isEmpty() || contains(key.name())) && (!this.lazy || (value != null && value.length > 0))) solrdoc.addSolr(key, value);
    }
-    
+
    protected void addSolr(final SolrDoc solrdoc, final SolrField key, final List<String> value) {
        if ((isEmpty() || contains(key.name())) && (!this.lazy || (value != null && !value.isEmpty()))) solrdoc.addSolr(key, value);
    }
@ -163,7 +163,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
        addSolr(solrdoc, SolrField.author, yacydoc.dc_creator());
        addSolr(solrdoc, SolrField.description, yacydoc.dc_description());
        addSolr(solrdoc, SolrField.content_type, yacydoc.dc_format());
-        addSolr(solrdoc, SolrField.last_modified, header.lastModified());
+        addSolr(solrdoc, SolrField.last_modified, header == null ? new Date() : header.lastModified());
        addSolr(solrdoc, SolrField.keywords, yacydoc.dc_subject(' '));
        final String content = yacydoc.getTextString();
        addSolr(solrdoc, SolrField.text_t, content);
@ -224,10 +224,14 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
                if (robots_meta.indexOf("noindex",0) >= 0) b += 4;  // set bit 2
                if (robots_meta.indexOf("nofollow",0) >= 0) b += 8; // set bit 3
            }
-            String x_robots_tag = header.get(HeaderFramework.X_ROBOTS_TAG, "");
-            if (x_robots_tag.isEmpty()) {
-            	x_robots_tag = header.get(HeaderFramework.X_ROBOTS, "");
-            } else {
+            String x_robots_tag = "";
+            if (header != null) {
+                x_robots_tag = header.get(HeaderFramework.X_ROBOTS_TAG, "");
+                if (x_robots_tag.isEmpty()) {
+                    x_robots_tag = header.get(HeaderFramework.X_ROBOTS, "");
+                }
+            }
+            if (!x_robots_tag.isEmpty()) {
                // this tag may have values: noarchive, nosnippet, noindex, unavailable_after
                if (x_robots_tag.indexOf("noarchive",0) >= 0) b += 256;         // set bit 8
                if (x_robots_tag.indexOf("nosnippet",0) >= 0) b += 512;         // set bit 9
@ -398,7 +402,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
            }

            // response time
-            addSolr(solrdoc, SolrField.responsetime_i, header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0"));
+            addSolr(solrdoc, SolrField.responsetime_i, header == null ? 0 : Integer.parseInt(header.get(HeaderFramework.RESPONSE_TIME_MILLIS, "0")));
        }

        // list all links
@ -487,7 +491,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
            addSolr(solrdoc, SolrField.lon_coordinate, yacydoc.lon());
            addSolr(solrdoc, SolrField.lat_coordinate, yacydoc.lat());
        }
-        addSolr(solrdoc, SolrField.httpstatus_i, header.getStatusCode());
+        addSolr(solrdoc, SolrField.httpstatus_i, header == null ? 200 : header.getStatusCode());

        return solrdoc;
    }
--- a/source/net/yacy/search/query/RWIProcess.java
+++ b/source/net/yacy/search/query/RWIProcess.java
@ -221,7 +221,8 @@ public final class RWIProcess extends Thread
                    System.currentTimeMillis() - timer),
                false);
            if ( !index.isEmpty() ) {
-                add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, true, this.maxtime);
+                add(index, true, "local index: " + this.query.getSegment().getLocation(), -1, this.maxtime);
+                addFinalize();
            }
        } catch ( final Exception e ) {
            Log.logException(e);
@ -230,12 +231,15 @@ public final class RWIProcess extends Thread
        }
    }

+    public void addFinalize() {
+        this.addRunning = false;
+    }
+
    public void add(
        final ReferenceContainer<WordReference> index,
        final boolean local,
        final String resourceName,
        final int fullResource,
-        final boolean finalizeAddAtEnd,
        final long maxtime) {
        // we collect the urlhashes and construct a list with urlEntry objects
        // attention: if minEntries is too high, this method will not terminate within the maxTime
@ -422,10 +426,6 @@ public final class RWIProcess extends Thread

        } catch ( final InterruptedException e ) {
        } catch ( final RowSpaceExceededException e ) {
-        } finally {
-            if ( finalizeAddAtEnd ) {
-                this.addRunning = false;
-            }
        }

        //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
--- a/source/net/yacy/search/query/SnippetProcess.java
+++ b/source/net/yacy/search/query/SnippetProcess.java
@ -503,7 +503,7 @@ public class SnippetProcess {
                            sd = sdl.get(0);
                        }
                        if (sd != null) {
-                            solrContent = Switchboard.getSwitchboard().solrScheme.solrGetText(sd);
+                            solrContent = Switchboard.getSwitchboard().index.getSolrScheme().solrGetText(sd);
                        }
                    }

--- a/source/net/yacy/yacy.java
+++ b/source/net/yacy/yacy.java
@ -666,7 +666,7 @@ public final class yacy {
            final int cacheMem = (int)(MemoryControl.maxMemory() - MemoryControl.total());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");

-            final Segment wordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"));
+            final Segment wordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), null);
            wordIndex.connectRWI(10000, Integer.MAX_VALUE);
            wordIndex.connectUrlDb(false, false);
            final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.termIndex().referenceContainerIterator("AAAAAAAAAAAA".getBytes(), false, false);
@ -845,7 +845,7 @@ public final class yacy {
        try {
            Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
            if (resource.equals("all")) {
-                WordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"));
+                WordIndex = new Segment(log, new File(new File(indexPrimaryRoot, "freeworld"), "TEXT"), null);
                WordIndex.connectRWI(10000, Integer.MAX_VALUE);
                WordIndex.connectUrlDb(false, false);
                indexContainerIterator = WordIndex.termIndex().referenceContainerIterator(wordChunkStartHash.getBytes(), false, false);