From 62d177bf59045c2d5bd43a8fbd202efb5290f3de Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 23 Oct 2022 12:22:31 +0200 Subject: [PATCH] stub for jsonlist index importer web page --- htroot/IndexImportJsonList_p.html | 56 ++++++++++ .../document/importer/JsonListImporter.java | 19 +++- .../yacy/htroot/IndexImportJsonList_p.java | 104 ++++++++++++++++++ source/net/yacy/search/Switchboard.java | 2 +- 4 files changed, 174 insertions(+), 7 deletions(-) create mode 100644 htroot/IndexImportJsonList_p.html create mode 100644 source/net/yacy/htroot/IndexImportJsonList_p.java diff --git a/htroot/IndexImportJsonList_p.html b/htroot/IndexImportJsonList_p.html new file mode 100644 index 000000000..502b0ad73 --- /dev/null +++ b/htroot/IndexImportJsonList_p.html @@ -0,0 +1,56 @@ + + + + YaCy '#[clientname]#': JsonList Import + #%env/templates/metas.template%# + #(import)#:: + + #(/import)# + + + #%env/templates/header.template%# + #%env/templates/submenuIndexImport.template%# +

JSON List Index Dump File Import

+ + #(import)# +

No import thread is running, you can start a new thread here

+
+ +
+ JsonList File Selection: select an jsonlist file (which may be gz compressed) +

+ You can download jsonlist archives from the YaCy Searchlab portal. +

+
+
+
+
+
or
+
+
+
+
+
+
+
+ +
+ :: +
+
Import Process +
+
Thread:
#[thread]#
+
JsonList File:
#[jsonlistfile]#
+
Processed:
#[count]# Entries
+
Speed:
#[speed]# pages per second
+
Running Time:
#[runningHours]# hours, #[runningMinutes]# minutes
+
Remaining Time:
#[remainingHours]# hours, #[remainingMinutes]# minutes
+
+
+ +
+ #(/import)# + + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/source/net/yacy/document/importer/JsonListImporter.java b/source/net/yacy/document/importer/JsonListImporter.java index 3fc81457e..ac23db760 100644 --- a/source/net/yacy/document/importer/JsonListImporter.java +++ b/source/net/yacy/document/importer/JsonListImporter.java @@ -67,17 +67,22 @@ public class JsonListImporter extends Thread implements Importer { private InputStream source; private final String name; + private final File inputFile; private final long sourceSize; private long lineCount, startTime, consumed; - private boolean abort = false; + private boolean abort; + private final boolean deletewhendone; - public JsonListImporter(final File f) throws IOException { - super("JsonListImporter - from file " + f.getName()); + public JsonListImporter(final File inputFile, final boolean deletewhendone) throws IOException { + super("JsonListImporter - from file " + inputFile.getName()); this.lineCount = 0; this.consumed = 0; - this.name = f.getName(); - this.sourceSize = f.length(); - this.source = new FileInputStream(f); + this.inputFile = inputFile; + this.name = inputFile.getName(); + this.sourceSize = inputFile.length(); + this.abort = false; + this.deletewhendone = deletewhendone; + this.source = new FileInputStream(inputFile); if (this.name.endsWith(".gz")) this.source = new GZIPInputStream(this.source); } @@ -255,6 +260,8 @@ public class JsonListImporter extends Thread implements Importer { try {indexer[t].join(10000);} catch (final InterruptedException e) {} } + if (this.deletewhendone) this.inputFile.delete(); + log.info("finished processing json surrogate: " + ((System.currentTimeMillis() - this.startTime) / 1000) + " seconds"); } diff --git a/source/net/yacy/htroot/IndexImportJsonList_p.java b/source/net/yacy/htroot/IndexImportJsonList_p.java new file mode 100644 index 000000000..785ba0248 --- /dev/null +++ b/source/net/yacy/htroot/IndexImportJsonList_p.java @@ -0,0 +1,104 @@ +/** + * IndexImportJsonList_p + * Copyright 23.10.2022 by Michael Peter Christen, @orbiterlab + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.htroot; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.net.URL; +import java.nio.channels.Channels; + +import net.yacy.cora.protocol.RequestHeader; +import net.yacy.document.importer.JsonListImporter; +import net.yacy.server.serverObjects; +import net.yacy.server.serverSwitch; + +public class IndexImportJsonList_p { + + public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + final serverObjects prop = new serverObjects(); + + if (JsonListImporter.job != null && JsonListImporter.job.isAlive()) { + // one import is running, no option to insert anything + prop.put("import", 1); + prop.put("import_thread", "running"); + prop.put("import_jsonlistfile", JsonListImporter.job.source()); + prop.put("import_count", JsonListImporter.job.count()); + prop.put("import_speed", JsonListImporter.job.speed()); + prop.put("import_runningHours", (JsonListImporter.job.runningTime() / 60) / 60); + prop.put("import_runningMinutes", (JsonListImporter.job.runningTime() / 60) % 60); + prop.put("import_remainingHours", (JsonListImporter.job.remainingTime() / 60) / 60); + prop.put("import_remainingMinutes", (JsonListImporter.job.remainingTime() / 60) % 60); + if (post != null && post.containsKey("abort")) { + JsonListImporter.job.quit(); + } + } else { + prop.put("import", 0); + if (post != null) { + if (post.containsKey("file") || post.containsKey("url")) { + final String filename = post.get("file"); + if (filename != null && filename.length() > 0) { + final File sourcefile = new File(filename); + if (sourcefile.exists()) { + try { + final JsonListImporter wi = new JsonListImporter(sourcefile, false); + wi.start(); + prop.put("import_thread", "started"); + } catch (final IOException ex) { + prop.put("import_thread", "Error: file not found [" + filename + "]"); + } + prop.put("import", 1); + prop.put("import_jsonlistfile", filename); + } else { + prop.put("import_jsonlistfile", ""); + prop.put("import_thread", "Error: file not found [" + filename + "]"); + } + } else { + final String urlstr = post.get("url"); + if (urlstr != null && urlstr.length() > 0) { + try { + final URL url = new URL(urlstr); + final File tempfile = File.createTempFile("jsonlistimporter", ""); + final FileOutputStream fos = new FileOutputStream(tempfile); + fos.getChannel().transferFrom(Channels.newChannel(url.openStream()), 0, Long.MAX_VALUE); + fos.close(); + final JsonListImporter wi = new JsonListImporter(tempfile, true); + wi.start(); + prop.put("import_thread", "started"); + } catch (final IOException ex) { + prop.put("import_thread", ex.getMessage()); + } + prop.put("import", 1); + prop.put("import_jsonlistfile", urlstr); + } + } + + prop.put("import_count", 0); + prop.put("import_speed", 0); + prop.put("import_runningHours", 0); + prop.put("import_runningMinutes", 0); + prop.put("import_remainingHours", 0); + prop.put("import_remainingMinutes", 0); + } + } + } + return prop; + } +} diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 68ff7b426..75a2411e0 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2206,7 +2206,7 @@ public final class Switchboard extends serverSwitch { // see https://github.com/yacy/yacy_grid_parser/blob/master/README.md this.log.info("processing json surrogate " + infile); try { - final JsonListImporter importer = new JsonListImporter(infile); + final JsonListImporter importer = new JsonListImporter(infile, false); importer.run(); } catch (final IOException e) { this.log.warn(e);