mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added option to re-index exported xml snapshot dumps to
HTCACHE/snapshots by just placing them in the SURROGATES/in path
This commit is contained in:
parent
6f4fe4b175
commit
ff29b0e503
|
@ -31,6 +31,8 @@ import java.util.Set;
|
|||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import net.yacy.cora.storage.Configuration;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
|
@ -87,6 +89,16 @@ public class SchemaConfiguration extends Configuration implements Serializable {
|
|||
return sid;
|
||||
}
|
||||
|
||||
public SolrInputDocument toSolrInputDocument(final MultiMapSolrParams params) {
|
||||
SolrInputDocument sid = new SolrInputDocument();
|
||||
for (String name: params.getMap().keySet()) {
|
||||
if (this.contains(name)) { // check each field if enabled in local Solr schema
|
||||
sid.addField(name, params.getParams(name));
|
||||
}
|
||||
}
|
||||
return sid;
|
||||
}
|
||||
|
||||
public SolrDocument toSolrDocument(final SolrInputDocument doc, Set<String> omitFields) {
|
||||
SolrDocument sd = new SolrDocument();
|
||||
for (SolrInputField field: doc) {
|
||||
|
|
|
@ -127,6 +127,7 @@ public class DCEntry extends MultiMapSolrParams {
|
|||
// <dc:identifier>http://hdl.handle.net/2104/8302</dc:identifier>
|
||||
// <dc:identifier>10.1051/0004-6361/201117940</dc:identifier>
|
||||
String u = this.get("url");
|
||||
if (u == null) u = this.get("sku");
|
||||
|
||||
if (u == null) {
|
||||
final String[] urls = this.getParams("dc:identifier");
|
||||
|
@ -342,7 +343,7 @@ public class DCEntry extends MultiMapSolrParams {
|
|||
null,
|
||||
getDescriptions(),
|
||||
getLon(), getLat(),
|
||||
"",
|
||||
get("text_t", ""),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
|
|
|
@ -140,6 +140,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
|
|||
this.surrogate = new DCEntry();
|
||||
} else if ("element".equals(tag) || "str".equals(tag) || "int".equals(tag) || "bool".equals(tag) || "long".equals(tag)) {
|
||||
this.elementName = atts.getValue("name");
|
||||
this.parsingValue = true;
|
||||
} else if ("value".equals(tag)) {
|
||||
this.buffer.setLength(0);
|
||||
this.parsingValue = true;
|
||||
|
@ -174,6 +175,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
|
|||
if (this.elementName != null) {
|
||||
this.surrogate.getMap().put(this.elementName, new String[]{value});
|
||||
}
|
||||
this.buffer.setLength(0);
|
||||
this.parsingValue = false;
|
||||
} else if ("value".equals(tag)) {
|
||||
//System.out.println("BUFFER-SIZE=" + buffer.length());
|
||||
|
|
|
@ -1942,6 +1942,7 @@ public final class Switchboard extends serverSwitch {
|
|||
ConcurrentLog.logException(e);
|
||||
}
|
||||
}
|
||||
log.info("processed surrogate " + infile);
|
||||
}
|
||||
}
|
||||
if (is != null) try {is.close();} catch (IOException e) {}
|
||||
|
@ -1969,23 +1970,27 @@ public final class Switchboard extends serverSwitch {
|
|||
continue;
|
||||
}
|
||||
|
||||
// create a queue entry
|
||||
final Document document = surrogate.document();
|
||||
final Request request =
|
||||
new Request(
|
||||
ASCII.getBytes(this.peers.mySeed().hash),
|
||||
surrogate.getIdentifier(true),
|
||||
null,
|
||||
"",
|
||||
surrogate.getDate(),
|
||||
this.crawler.defaultSurrogateProfile.handle(),
|
||||
0,
|
||||
this.crawler.defaultSurrogateProfile.timezoneOffset());
|
||||
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
|
||||
final IndexingQueueEntry queueEntry =
|
||||
new IndexingQueueEntry(response, new Document[] {document}, null);
|
||||
|
||||
this.indexingCondensementProcessor.enQueue(queueEntry);
|
||||
if (surrogate.get("text_t") == null) {
|
||||
// create a queue entry
|
||||
final Document document = surrogate.document();
|
||||
final Request request =
|
||||
new Request(
|
||||
ASCII.getBytes(this.peers.mySeed().hash),
|
||||
surrogate.getIdentifier(true),
|
||||
null,
|
||||
"",
|
||||
surrogate.getDate(),
|
||||
this.crawler.defaultSurrogateProfile.handle(),
|
||||
0,
|
||||
this.crawler.defaultSurrogateProfile.timezoneOffset());
|
||||
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
|
||||
final IndexingQueueEntry queueEntry =
|
||||
new IndexingQueueEntry(response, new Document[] {document}, null);
|
||||
|
||||
this.indexingCondensementProcessor.enQueue(queueEntry);
|
||||
} else {
|
||||
this.index.putDocument(this.index.fulltext().getDefaultConfiguration().toSolrInputDocument(surrogate));
|
||||
}
|
||||
if (shallTerminate()) break;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user