stub for jsonlist index importer web page

This commit is contained in:
Michael Peter Christen 2022-10-23 12:22:31 +02:00
parent efa0425f00
commit 62d177bf59
4 changed files with 174 additions and 7 deletions

View File

@ -0,0 +1,56 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': JsonList Import</title>
#%env/templates/metas.template%#
#(import)#::<meta http-equiv="REFRESH" content="10;url=IndexImportJsonList_p.html" />
<!-- the url= removes http get parameters on refresh, preventing restart of import -->
#(/import)#
</head>
<body id="IndexImportJsonList">
#%env/templates/header.template%#
#%env/templates/submenuIndexImport.template%#
<h2>JSON List Index Dump File Import</h2>
#(import)#
<p>No import thread is running, you can start a new thread here</p>
<form action="IndexImportJsonList_p.html" method="get" accept-charset="UTF-8">
<!-- no post method here, we don't want to transmit the whole file, only the path-->
<fieldset>
<legend>JsonList File Selection: select an jsonlist file (which may be gz compressed)</legend>
<p>
You can download jsonlist archives from the <a href="https://searchlab.eu" target="_blank">YaCy Searchlab</a> portal.
</p>
<dl>
<dt class="TableCellDark"><label for="file">File:</label></dt>
<dd><input name="file" id="file" type="file" value="" size="75" /></dd>
<dt></dt>
<dd>or</dd>
<dt class="TableCellDark"><label for="url">Url:</label></dt>
<dd><input name="url" id="url" value="" size="75"/></dd>
<dt></dt>
<dd><input name="submit" class="btn btn-primary" type="submit" value="Import JsonList File" /></dd>
</dl>
</fieldset>
</form>
<br />
::
<form>
<fieldset><legend>Import Process</legend>
<dl>
<dt>Thread:</dt><dd>#[thread]#</dd>
<dt>JsonList File:</dt><dd>#[jsonlistfile]#</dd>
<dt>Processed:</dt><dd>#[count]# Entries</dd>
<dt>Speed:</dt><dd>#[speed]# pages per second</dd>
<dt>Running Time:</dt><dd>#[runningHours]# hours, #[runningMinutes]# minutes</dd>
<dt>Remaining Time:</dt><dd>#[remainingHours]# hours, #[remainingMinutes]# minutes</dd>
</dl>
</fieldset>
<input name="abort" type="submit" class="btn btn-danger" value="Stop"/>
</form>
#(/import)#
#%env/templates/footer.template%#
</body>
</html>

View File

@ -67,17 +67,22 @@ public class JsonListImporter extends Thread implements Importer {
private InputStream source;
private final String name;
private final File inputFile;
private final long sourceSize;
private long lineCount, startTime, consumed;
private boolean abort = false;
private boolean abort;
private final boolean deletewhendone;
public JsonListImporter(final File f) throws IOException {
super("JsonListImporter - from file " + f.getName());
public JsonListImporter(final File inputFile, final boolean deletewhendone) throws IOException {
super("JsonListImporter - from file " + inputFile.getName());
this.lineCount = 0;
this.consumed = 0;
this.name = f.getName();
this.sourceSize = f.length();
this.source = new FileInputStream(f);
this.inputFile = inputFile;
this.name = inputFile.getName();
this.sourceSize = inputFile.length();
this.abort = false;
this.deletewhendone = deletewhendone;
this.source = new FileInputStream(inputFile);
if (this.name.endsWith(".gz")) this.source = new GZIPInputStream(this.source);
}
@ -255,6 +260,8 @@ public class JsonListImporter extends Thread implements Importer {
try {indexer[t].join(10000);} catch (final InterruptedException e) {}
}
if (this.deletewhendone) this.inputFile.delete();
log.info("finished processing json surrogate: " + ((System.currentTimeMillis() - this.startTime) / 1000) + " seconds");
}

View File

@ -0,0 +1,104 @@
/**
* IndexImportJsonList_p
* Copyright 23.10.2022 by Michael Peter Christen, @orbiterlab
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.htroot;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.channels.Channels;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.importer.JsonListImporter;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public class IndexImportJsonList_p {
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
if (JsonListImporter.job != null && JsonListImporter.job.isAlive()) {
// one import is running, no option to insert anything
prop.put("import", 1);
prop.put("import_thread", "running");
prop.put("import_jsonlistfile", JsonListImporter.job.source());
prop.put("import_count", JsonListImporter.job.count());
prop.put("import_speed", JsonListImporter.job.speed());
prop.put("import_runningHours", (JsonListImporter.job.runningTime() / 60) / 60);
prop.put("import_runningMinutes", (JsonListImporter.job.runningTime() / 60) % 60);
prop.put("import_remainingHours", (JsonListImporter.job.remainingTime() / 60) / 60);
prop.put("import_remainingMinutes", (JsonListImporter.job.remainingTime() / 60) % 60);
if (post != null && post.containsKey("abort")) {
JsonListImporter.job.quit();
}
} else {
prop.put("import", 0);
if (post != null) {
if (post.containsKey("file") || post.containsKey("url")) {
final String filename = post.get("file");
if (filename != null && filename.length() > 0) {
final File sourcefile = new File(filename);
if (sourcefile.exists()) {
try {
final JsonListImporter wi = new JsonListImporter(sourcefile, false);
wi.start();
prop.put("import_thread", "started");
} catch (final IOException ex) {
prop.put("import_thread", "Error: file not found [" + filename + "]");
}
prop.put("import", 1);
prop.put("import_jsonlistfile", filename);
} else {
prop.put("import_jsonlistfile", "");
prop.put("import_thread", "Error: file not found [" + filename + "]");
}
} else {
final String urlstr = post.get("url");
if (urlstr != null && urlstr.length() > 0) {
try {
final URL url = new URL(urlstr);
final File tempfile = File.createTempFile("jsonlistimporter", "");
final FileOutputStream fos = new FileOutputStream(tempfile);
fos.getChannel().transferFrom(Channels.newChannel(url.openStream()), 0, Long.MAX_VALUE);
fos.close();
final JsonListImporter wi = new JsonListImporter(tempfile, true);
wi.start();
prop.put("import_thread", "started");
} catch (final IOException ex) {
prop.put("import_thread", ex.getMessage());
}
prop.put("import", 1);
prop.put("import_jsonlistfile", urlstr);
}
}
prop.put("import_count", 0);
prop.put("import_speed", 0);
prop.put("import_runningHours", 0);
prop.put("import_runningMinutes", 0);
prop.put("import_remainingHours", 0);
prop.put("import_remainingMinutes", 0);
}
}
}
return prop;
}
}

View File

@ -2206,7 +2206,7 @@ public final class Switchboard extends serverSwitch {
// see https://github.com/yacy/yacy_grid_parser/blob/master/README.md
this.log.info("processing json surrogate " + infile);
try {
final JsonListImporter importer = new JsonListImporter(infile);
final JsonListImporter importer = new JsonListImporter(infile, false);
importer.run();
} catch (final IOException e) {
this.log.warn(e);