git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6446 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-11-02 19:23:05 +00:00
parent 2fa6bf440b
commit 176e334aa4
5 changed files with 18 additions and 9 deletions

View File

@ -75,7 +75,7 @@
:: ::
<h2>(7) Results from surrogates import</h2> <h2>(7) Results from surrogates import</h2>
<p>These records had been imported from surrogate files in DATA/SURROGATES/in</p> <p>These records had been imported from surrogate files in DATA/SURROGATES/in</p>
<p><em>Use Case:</em> place files with dublin core metadata content into DATA/SURROGATES/in or use an index import method (i.e. wikimedia import, OAI-PMH retrieval)</p> <p><em>Use Case:</em> place files with dublin core metadata content into DATA/SURROGATES/in or use an index import method (i.e. <a href="/IndexImportWikimedia_p.html">wikimedia import</a>, <a href="/IndexImportOAIPMH_p.html">OAI-PMH retrieval</a>)</p>
#(/process)# #(/process)#

View File

@ -9,11 +9,12 @@
#%env/templates/header.template%# #%env/templates/header.template%#
#%env/templates/submenuContentIntegration.template%# #%env/templates/submenuContentIntegration.template%#
<h2>OAI-PMH Import</h2> <h2>OAI-PMH Import</h2>
<p>Results from the import can be monitored in the <a href="/CrawlResults.html?process=7">indexing results for surrogates</a></p>
<form action="IndexImportOAIPMH_p.html" method="get"> <form action="IndexImportOAIPMH_p.html" method="get">
<fieldset> <fieldset>
<legend>Single request import</legend> <legend>Single request import</legend>
This will submit only a single request as given here to a OAI-PMH server and imports records into the index This will submit only a single request as given here to a OAI-PMH server and imports records into the index<br />
<input name="urlstartone" type="text" value="#[defaulturl]#" size="100" /> <input name="urlstartone" type="text" value="#[defaulturl]#" size="100" />
<input name="submit" type="submit" value="Import OAI-PMH source" /> <input name="submit" type="submit" value="Import OAI-PMH source" />
</fieldset> </fieldset>
@ -36,7 +37,7 @@
<form action="IndexImportOAIPMH_p.html" method="get"> <form action="IndexImportOAIPMH_p.html" method="get">
<fieldset> <fieldset>
<legend>Import all Records from a server</legend> <legend>Import all Records from a server</legend>
Import all records that follow acording to resumption elements into index Import all records that follow acording to resumption elements into index<br />
<input name="urlstartall" type="text" value="" size="100" /> <input name="urlstartall" type="text" value="" size="100" />
<input name="submit" type="submit" value="Import OAI-PMH source" /> <input name="submit" type="submit" value="Import OAI-PMH source" />
</fieldset> </fieldset>
@ -46,7 +47,7 @@
<dl> <dl>
<dt>Thread:</dt><dd>#[thread]#</dd> <dt>Thread:</dt><dd>#[thread]#</dd>
<dt>Source:</dt><dd>#[source]#</dd> <dt>Source:</dt><dd>#[source]#</dd>
<dt>Processed Chunks:</dt><dd>#[chunkCount]# records</dd> <dt>Processed Chunks:</dt><dd>#[chunkCount]#</dd>
<dt>Imported Records:</dt><dd>#[recordsCount]# records</dd> <dt>Imported Records:</dt><dd>#[recordsCount]# records</dd>
<dt>Speed:</dt><dd>#[speed]# records per second</dd> <dt>Speed:</dt><dd>#[speed]# records per second</dd>
</dl> </dl>

View File

@ -73,6 +73,7 @@ public class IndexImportOAIPMH_p {
if (post != null) { if (post != null) {
if (post.containsKey("urlstartone")) { if (post.containsKey("urlstartone")) {
String oaipmhurl = post.get("urlstartone"); String oaipmhurl = post.get("urlstartone");
if (oaipmhurl.indexOf("?") < 0) oaipmhurl = oaipmhurl + "?verb=ListRecords&metadataPrefix=oai_dc";
DigestURI url = null; DigestURI url = null;
try { try {
url = new DigestURI(oaipmhurl, null); url = new DigestURI(oaipmhurl, null);

View File

@ -59,7 +59,7 @@ public class OAIPMHReader {
response = loader.load(source, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE); response = loader.load(source, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
byte[] b = response.getContent(); byte[] b = response.getContent();
this.resumptionToken = new ResumptionToken(new ByteArrayInputStream(b)); this.resumptionToken = new ResumptionToken(new ByteArrayInputStream(b));
String file = filePrefix + "_" + this.source.getHost() + "_" + DateFormatter.formatShortMilliSecond(new Date()); String file = filePrefix + "." + filename4source(source) + "." + DateFormatter.formatShortMilliSecond(new Date());
File f0 = new File(targetDir, file + ".tmp"); File f0 = new File(targetDir, file + ".tmp");
File f1 = new File(targetDir, file + ".xml"); File f1 = new File(targetDir, file + ".xml");
@ -81,6 +81,15 @@ public class OAIPMHReader {
*/ */
} }
public static final String filename4source(DigestURI source) {
String s = ResumptionToken.truncatedURL(source);
if (s.endsWith("?")) s = s.substring(0, s.length() - 1);
if (s.endsWith("/")) s = s.substring(0, s.length() - 1);
if (s.startsWith("https://")) s = s.substring(8);
if (s.startsWith("http://")) s = s.substring(7);
return s.replace('.', '_').replace('/', '_').replace(':', '_');
}
public ResumptionToken getResumptionToken() { public ResumptionToken getResumptionToken() {
return this.resumptionToken; return this.resumptionToken;
} }

View File

@ -135,11 +135,9 @@ public class ResumptionToken extends TreeMap<String, String> {
if (expiration != null) { if (expiration != null) {
if (expiration.before(new Date())) throw new IOException("the resumption is expired at " + DateFormatter.formatISO8601(expiration) + " (now: " + DateFormatter.formatISO8601(new Date())); if (expiration.before(new Date())) throw new IOException("the resumption is expired at " + DateFormatter.formatISO8601(expiration) + " (now: " + DateFormatter.formatISO8601(new Date()));
// the resumption token is still fresh // the resumption token is still fresh
return new DigestURI(url + "verb=ListRecords&resumptionToken=" + token, null);
} }
// may still be an encoded state return new DigestURI(url + "verb=ListRecords&resumptionToken=" + token, null);
return new DigestURI(url + "verb=ListRecords&" + token, null);
} }
/** /**