mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added option to re-index exported xml snapshot dumps to
HTCACHE/snapshots by just placing them in the SURROGATES/in path
This commit is contained in:
parent
6f4fe4b175
commit
ff29b0e503
|
@ -31,6 +31,8 @@ import java.util.Set;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.SolrInputField;
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
|
||||||
import net.yacy.cora.storage.Configuration;
|
import net.yacy.cora.storage.Configuration;
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
|
@ -87,6 +89,16 @@ public class SchemaConfiguration extends Configuration implements Serializable {
|
||||||
return sid;
|
return sid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SolrInputDocument toSolrInputDocument(final MultiMapSolrParams params) {
|
||||||
|
SolrInputDocument sid = new SolrInputDocument();
|
||||||
|
for (String name: params.getMap().keySet()) {
|
||||||
|
if (this.contains(name)) { // check each field if enabled in local Solr schema
|
||||||
|
sid.addField(name, params.getParams(name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sid;
|
||||||
|
}
|
||||||
|
|
||||||
public SolrDocument toSolrDocument(final SolrInputDocument doc, Set<String> omitFields) {
|
public SolrDocument toSolrDocument(final SolrInputDocument doc, Set<String> omitFields) {
|
||||||
SolrDocument sd = new SolrDocument();
|
SolrDocument sd = new SolrDocument();
|
||||||
for (SolrInputField field: doc) {
|
for (SolrInputField field: doc) {
|
||||||
|
|
|
@ -127,6 +127,7 @@ public class DCEntry extends MultiMapSolrParams {
|
||||||
// <dc:identifier>http://hdl.handle.net/2104/8302</dc:identifier>
|
// <dc:identifier>http://hdl.handle.net/2104/8302</dc:identifier>
|
||||||
// <dc:identifier>10.1051/0004-6361/201117940</dc:identifier>
|
// <dc:identifier>10.1051/0004-6361/201117940</dc:identifier>
|
||||||
String u = this.get("url");
|
String u = this.get("url");
|
||||||
|
if (u == null) u = this.get("sku");
|
||||||
|
|
||||||
if (u == null) {
|
if (u == null) {
|
||||||
final String[] urls = this.getParams("dc:identifier");
|
final String[] urls = this.getParams("dc:identifier");
|
||||||
|
@ -342,7 +343,7 @@ public class DCEntry extends MultiMapSolrParams {
|
||||||
null,
|
null,
|
||||||
getDescriptions(),
|
getDescriptions(),
|
||||||
getLon(), getLat(),
|
getLon(), getLat(),
|
||||||
"",
|
get("text_t", ""),
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
|
|
|
@ -140,6 +140,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
|
||||||
this.surrogate = new DCEntry();
|
this.surrogate = new DCEntry();
|
||||||
} else if ("element".equals(tag) || "str".equals(tag) || "int".equals(tag) || "bool".equals(tag) || "long".equals(tag)) {
|
} else if ("element".equals(tag) || "str".equals(tag) || "int".equals(tag) || "bool".equals(tag) || "long".equals(tag)) {
|
||||||
this.elementName = atts.getValue("name");
|
this.elementName = atts.getValue("name");
|
||||||
|
this.parsingValue = true;
|
||||||
} else if ("value".equals(tag)) {
|
} else if ("value".equals(tag)) {
|
||||||
this.buffer.setLength(0);
|
this.buffer.setLength(0);
|
||||||
this.parsingValue = true;
|
this.parsingValue = true;
|
||||||
|
@ -174,6 +175,7 @@ public class SurrogateReader extends DefaultHandler implements Runnable {
|
||||||
if (this.elementName != null) {
|
if (this.elementName != null) {
|
||||||
this.surrogate.getMap().put(this.elementName, new String[]{value});
|
this.surrogate.getMap().put(this.elementName, new String[]{value});
|
||||||
}
|
}
|
||||||
|
this.buffer.setLength(0);
|
||||||
this.parsingValue = false;
|
this.parsingValue = false;
|
||||||
} else if ("value".equals(tag)) {
|
} else if ("value".equals(tag)) {
|
||||||
//System.out.println("BUFFER-SIZE=" + buffer.length());
|
//System.out.println("BUFFER-SIZE=" + buffer.length());
|
||||||
|
|
|
@ -1942,6 +1942,7 @@ public final class Switchboard extends serverSwitch {
|
||||||
ConcurrentLog.logException(e);
|
ConcurrentLog.logException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
log.info("processed surrogate " + infile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (is != null) try {is.close();} catch (IOException e) {}
|
if (is != null) try {is.close();} catch (IOException e) {}
|
||||||
|
@ -1969,23 +1970,27 @@ public final class Switchboard extends serverSwitch {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// create a queue entry
|
if (surrogate.get("text_t") == null) {
|
||||||
final Document document = surrogate.document();
|
// create a queue entry
|
||||||
final Request request =
|
final Document document = surrogate.document();
|
||||||
new Request(
|
final Request request =
|
||||||
ASCII.getBytes(this.peers.mySeed().hash),
|
new Request(
|
||||||
surrogate.getIdentifier(true),
|
ASCII.getBytes(this.peers.mySeed().hash),
|
||||||
null,
|
surrogate.getIdentifier(true),
|
||||||
"",
|
null,
|
||||||
surrogate.getDate(),
|
"",
|
||||||
this.crawler.defaultSurrogateProfile.handle(),
|
surrogate.getDate(),
|
||||||
0,
|
this.crawler.defaultSurrogateProfile.handle(),
|
||||||
this.crawler.defaultSurrogateProfile.timezoneOffset());
|
0,
|
||||||
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
|
this.crawler.defaultSurrogateProfile.timezoneOffset());
|
||||||
final IndexingQueueEntry queueEntry =
|
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false, null);
|
||||||
new IndexingQueueEntry(response, new Document[] {document}, null);
|
final IndexingQueueEntry queueEntry =
|
||||||
|
new IndexingQueueEntry(response, new Document[] {document}, null);
|
||||||
this.indexingCondensementProcessor.enQueue(queueEntry);
|
|
||||||
|
this.indexingCondensementProcessor.enQueue(queueEntry);
|
||||||
|
} else {
|
||||||
|
this.index.putDocument(this.index.fulltext().getDefaultConfiguration().toSolrInputDocument(surrogate));
|
||||||
|
}
|
||||||
if (shallTerminate()) break;
|
if (shallTerminate()) break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user