mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fixes
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6446 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
2fa6bf440b
commit
176e334aa4
|
@ -75,7 +75,7 @@
|
||||||
::
|
::
|
||||||
<h2>(7) Results from surrogates import</h2>
|
<h2>(7) Results from surrogates import</h2>
|
||||||
<p>These records had been imported from surrogate files in DATA/SURROGATES/in</p>
|
<p>These records had been imported from surrogate files in DATA/SURROGATES/in</p>
|
||||||
<p><em>Use Case:</em> place files with dublin core metadata content into DATA/SURROGATES/in or use an index import method (i.e. wikimedia import, OAI-PMH retrieval)</p>
|
<p><em>Use Case:</em> place files with dublin core metadata content into DATA/SURROGATES/in or use an index import method (i.e. <a href="/IndexImportWikimedia_p.html">wikimedia import</a>, <a href="/IndexImportOAIPMH_p.html">OAI-PMH retrieval</a>)</p>
|
||||||
#(/process)#
|
#(/process)#
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -9,11 +9,12 @@
|
||||||
#%env/templates/header.template%#
|
#%env/templates/header.template%#
|
||||||
#%env/templates/submenuContentIntegration.template%#
|
#%env/templates/submenuContentIntegration.template%#
|
||||||
<h2>OAI-PMH Import</h2>
|
<h2>OAI-PMH Import</h2>
|
||||||
|
<p>Results from the import can be monitored in the <a href="/CrawlResults.html?process=7">indexing results for surrogates</a></p>
|
||||||
|
|
||||||
<form action="IndexImportOAIPMH_p.html" method="get">
|
<form action="IndexImportOAIPMH_p.html" method="get">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Single request import</legend>
|
<legend>Single request import</legend>
|
||||||
This will submit only a single request as given here to a OAI-PMH server and imports records into the index
|
This will submit only a single request as given here to a OAI-PMH server and imports records into the index<br />
|
||||||
<input name="urlstartone" type="text" value="#[defaulturl]#" size="100" />
|
<input name="urlstartone" type="text" value="#[defaulturl]#" size="100" />
|
||||||
<input name="submit" type="submit" value="Import OAI-PMH source" />
|
<input name="submit" type="submit" value="Import OAI-PMH source" />
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
@ -36,7 +37,7 @@
|
||||||
<form action="IndexImportOAIPMH_p.html" method="get">
|
<form action="IndexImportOAIPMH_p.html" method="get">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Import all Records from a server</legend>
|
<legend>Import all Records from a server</legend>
|
||||||
Import all records that follow acording to resumption elements into index
|
Import all records that follow acording to resumption elements into index<br />
|
||||||
<input name="urlstartall" type="text" value="" size="100" />
|
<input name="urlstartall" type="text" value="" size="100" />
|
||||||
<input name="submit" type="submit" value="Import OAI-PMH source" />
|
<input name="submit" type="submit" value="Import OAI-PMH source" />
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
@ -46,7 +47,7 @@
|
||||||
<dl>
|
<dl>
|
||||||
<dt>Thread:</dt><dd>#[thread]#</dd>
|
<dt>Thread:</dt><dd>#[thread]#</dd>
|
||||||
<dt>Source:</dt><dd>#[source]#</dd>
|
<dt>Source:</dt><dd>#[source]#</dd>
|
||||||
<dt>Processed Chunks:</dt><dd>#[chunkCount]# records</dd>
|
<dt>Processed Chunks:</dt><dd>#[chunkCount]#</dd>
|
||||||
<dt>Imported Records:</dt><dd>#[recordsCount]# records</dd>
|
<dt>Imported Records:</dt><dd>#[recordsCount]# records</dd>
|
||||||
<dt>Speed:</dt><dd>#[speed]# records per second</dd>
|
<dt>Speed:</dt><dd>#[speed]# records per second</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
|
|
@ -73,6 +73,7 @@ public class IndexImportOAIPMH_p {
|
||||||
if (post != null) {
|
if (post != null) {
|
||||||
if (post.containsKey("urlstartone")) {
|
if (post.containsKey("urlstartone")) {
|
||||||
String oaipmhurl = post.get("urlstartone");
|
String oaipmhurl = post.get("urlstartone");
|
||||||
|
if (oaipmhurl.indexOf("?") < 0) oaipmhurl = oaipmhurl + "?verb=ListRecords&metadataPrefix=oai_dc";
|
||||||
DigestURI url = null;
|
DigestURI url = null;
|
||||||
try {
|
try {
|
||||||
url = new DigestURI(oaipmhurl, null);
|
url = new DigestURI(oaipmhurl, null);
|
||||||
|
|
|
@ -59,7 +59,7 @@ public class OAIPMHReader {
|
||||||
response = loader.load(source, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
|
response = loader.load(source, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
|
||||||
byte[] b = response.getContent();
|
byte[] b = response.getContent();
|
||||||
this.resumptionToken = new ResumptionToken(new ByteArrayInputStream(b));
|
this.resumptionToken = new ResumptionToken(new ByteArrayInputStream(b));
|
||||||
String file = filePrefix + "_" + this.source.getHost() + "_" + DateFormatter.formatShortMilliSecond(new Date());
|
String file = filePrefix + "." + filename4source(source) + "." + DateFormatter.formatShortMilliSecond(new Date());
|
||||||
File f0 = new File(targetDir, file + ".tmp");
|
File f0 = new File(targetDir, file + ".tmp");
|
||||||
File f1 = new File(targetDir, file + ".xml");
|
File f1 = new File(targetDir, file + ".xml");
|
||||||
|
|
||||||
|
@ -81,6 +81,15 @@ public class OAIPMHReader {
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String filename4source(DigestURI source) {
|
||||||
|
String s = ResumptionToken.truncatedURL(source);
|
||||||
|
if (s.endsWith("?")) s = s.substring(0, s.length() - 1);
|
||||||
|
if (s.endsWith("/")) s = s.substring(0, s.length() - 1);
|
||||||
|
if (s.startsWith("https://")) s = s.substring(8);
|
||||||
|
if (s.startsWith("http://")) s = s.substring(7);
|
||||||
|
return s.replace('.', '_').replace('/', '_').replace(':', '_');
|
||||||
|
}
|
||||||
|
|
||||||
public ResumptionToken getResumptionToken() {
|
public ResumptionToken getResumptionToken() {
|
||||||
return this.resumptionToken;
|
return this.resumptionToken;
|
||||||
}
|
}
|
||||||
|
|
|
@ -135,11 +135,9 @@ public class ResumptionToken extends TreeMap<String, String> {
|
||||||
if (expiration != null) {
|
if (expiration != null) {
|
||||||
if (expiration.before(new Date())) throw new IOException("the resumption is expired at " + DateFormatter.formatISO8601(expiration) + " (now: " + DateFormatter.formatISO8601(new Date()));
|
if (expiration.before(new Date())) throw new IOException("the resumption is expired at " + DateFormatter.formatISO8601(expiration) + " (now: " + DateFormatter.formatISO8601(new Date()));
|
||||||
// the resumption token is still fresh
|
// the resumption token is still fresh
|
||||||
return new DigestURI(url + "verb=ListRecords&resumptionToken=" + token, null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// may still be an encoded state
|
return new DigestURI(url + "verb=ListRecords&resumptionToken=" + token, null);
|
||||||
return new DigestURI(url + "verb=ListRecords&" + token, null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue
Block a user