added option to re-index exported xml snapshot dumps to

HTCACHE/snapshots by just placing them in the SURROGATES/in path
This commit is contained in:
Michael Peter Christen 2015-05-08 15:30:26 +02:00
parent 6f4fe4b175
commit ff29b0e503
4 changed files with 38 additions and 18 deletions

View File

@ -31,6 +31,8 @@ import java.util.Set;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField; import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.util.NamedList;
import net.yacy.cora.storage.Configuration; import net.yacy.cora.storage.Configuration;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
@ -87,6 +89,16 @@ public class SchemaConfiguration extends Configuration implements Serializable {
return sid; return sid;
} }
public SolrInputDocument toSolrInputDocument(final MultiMapSolrParams params) {
SolrInputDocument sid = new SolrInputDocument();
for (String name: params.getMap().keySet()) {
if (this.contains(name)) { // check each field if enabled in local Solr schema
sid.addField(name, params.getParams(name));
}
}
return sid;
}
public SolrDocument toSolrDocument(final SolrInputDocument doc, Set<String> omitFields) { public SolrDocument toSolrDocument(final SolrInputDocument doc, Set<String> omitFields) {
SolrDocument sd = new SolrDocument(); SolrDocument sd = new SolrDocument();
for (SolrInputField field: doc) { for (SolrInputField field: doc) {

View File

@ -127,6 +127,7 @@ public class DCEntry extends MultiMapSolrParams {
// <dc:identifier>http://hdl.handle.net/2104/8302</dc:identifier> // <dc:identifier>http://hdl.handle.net/2104/8302</dc:identifier>
// <dc:identifier>10.1051/0004-6361/201117940</dc:identifier> // <dc:identifier>10.1051/0004-6361/201117940</dc:identifier>
String u = this.get("url"); String u = this.get("url");
if (u == null) u = this.get("sku");
if (u == null) { if (u == null) {
final String[] urls = this.getParams("dc:identifier"); final String[] urls = this.getParams("dc:identifier");
@ -342,7 +343,7 @@ public class DCEntry extends MultiMapSolrParams {
null, null,
getDescriptions(), getDescriptions(),
getLon(), getLat(), getLon(), getLat(),
"", get("text_t", ""),
null, null,
null, null,
null, null,

View File

@ -140,6 +140,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
this.surrogate = new DCEntry(); this.surrogate = new DCEntry();
} else if ("element".equals(tag) || "str".equals(tag) || "int".equals(tag) || "bool".equals(tag) || "long".equals(tag)) { } else if ("element".equals(tag) || "str".equals(tag) || "int".equals(tag) || "bool".equals(tag) || "long".equals(tag)) {
this.elementName = atts.getValue("name"); this.elementName = atts.getValue("name");
this.parsingValue = true;
} else if ("value".equals(tag)) { } else if ("value".equals(tag)) {
this.buffer.setLength(0); this.buffer.setLength(0);
this.parsingValue = true; this.parsingValue = true;
@ -174,6 +175,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
if (this.elementName != null) { if (this.elementName != null) {
this.surrogate.getMap().put(this.elementName, new String[]{value}); this.surrogate.getMap().put(this.elementName, new String[]{value});
} }
this.buffer.setLength(0);
this.parsingValue = false; this.parsingValue = false;
} else if ("value".equals(tag)) { } else if ("value".equals(tag)) {
//System.out.println("BUFFER-SIZE=" + buffer.length()); //System.out.println("BUFFER-SIZE=" + buffer.length());

View File

@ -1942,6 +1942,7 @@ public final class Switchboard extends serverSwitch {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
} }
log.info("processed surrogate " + infile);
} }
} }
if (is != null) try {is.close();} catch (IOException e) {} if (is != null) try {is.close();} catch (IOException e) {}
@ -1969,23 +1970,27 @@ public final class Switchboard extends serverSwitch {
continue; continue;
} }
// create a queue entry if (surrogate.get("text_t") == null) {
final Document document = surrogate.document(); // create a queue entry
final Request request = final Document document = surrogate.document();
new Request( final Request request =
ASCII.getBytes(this.peers.mySeed().hash), new Request(
surrogate.getIdentifier(true), ASCII.getBytes(this.peers.mySeed().hash),
null, surrogate.getIdentifier(true),
"", null,
surrogate.getDate(), "",
this.crawler.defaultSurrogateProfile.handle(), surrogate.getDate(),
0, this.crawler.defaultSurrogateProfile.handle(),
this.crawler.defaultSurrogateProfile.timezoneOffset()); 0,
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null); this.crawler.defaultSurrogateProfile.timezoneOffset());
final IndexingQueueEntry queueEntry = response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
new IndexingQueueEntry(response, new Document[] {document}, null); final IndexingQueueEntry queueEntry =
new IndexingQueueEntry(response, new Document[] {document}, null);
this.indexingCondensementProcessor.enQueue(queueEntry);
this.indexingCondensementProcessor.enQueue(queueEntry);
} else {
this.index.putDocument(this.index.fulltext().getDefaultConfiguration().toSolrInputDocument(surrogate));
}
if (shallTerminate()) break; if (shallTerminate()) break;
} }
} }