mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
0N - added option to generate index export files for a specific number
of minutes in the past and reverted latest change. The export file dump will now contain four data elements: f - first date of index entry write date, l - last date of index write date, n - now-date of index dump time, c - count of numbers inside the dump. '0N' denotes a series of changes which will lead to the opportunity to exchange index data dumps in a way that is needed to integrate ZeroNet index data. This will be based on index dump sharing; that causes this commit.
This commit is contained in:
parent
5b9030180c
commit
a6bf0b1649
|
@ -18,8 +18,8 @@
|
|||
<form action="IndexExport_p.html" method="post" enctype="multipart/form-data" accept-charset="UTF-8">
|
||||
<fieldset><legend>Loaded URL Export</legend>
|
||||
<dl>
|
||||
<dt class="TableCellDark">Export File</dt>
|
||||
<dd><input type="text" name="exportfile" value="#[exportfile]#" size="120" maxlength="250" />
|
||||
<dt class="TableCellDark">Export Path</dt>
|
||||
<dd><input type="text" name="exportfilepath" value="#[exportfilepath]#" size="120" maxlength="250" />
|
||||
</dd>
|
||||
<dt class="TableCellDark">URL Filter</dt>
|
||||
<dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
|
||||
|
@ -27,6 +27,9 @@
|
|||
<dt class="TableCellDark">query</dt>
|
||||
<dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
|
||||
</dd>
|
||||
<dt class="TableCellDark">maximum age (seconds, -1 = unlimited)</dt>
|
||||
<dd><input type="text" name="exportmaxseconds" value="-1" size="20" maxlength="250" />
|
||||
</dd>
|
||||
<dt class="TableCellDark">Export Format</dt>
|
||||
<dd>
|
||||
<dl>
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.date.GenericFormatter;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.index.Fulltext;
|
||||
|
@ -53,9 +53,10 @@ public class IndexExport_p {
|
|||
List<File> dumpFiles = segment.fulltext().dumpFiles();
|
||||
prop.put("dumprestore_dumpfile", dumpFiles.size() == 0 ? "" : dumpFiles.get(dumpFiles.size() - 1).getAbsolutePath());
|
||||
prop.put("dumprestore_optimizemax", 10);
|
||||
prop.putNum("ucount", ucount);
|
||||
|
||||
// show export messages
|
||||
final Fulltext.Export export = segment.fulltext().export();
|
||||
Fulltext.Export export = segment.fulltext().export();
|
||||
if ((export != null) && (export.isAlive())) {
|
||||
// there is currently a running export
|
||||
prop.put("lurlexport", 2);
|
||||
|
@ -66,7 +67,7 @@ public class IndexExport_p {
|
|||
prop.put("reload", 1);
|
||||
} else {
|
||||
prop.put("lurlexport", 1);
|
||||
prop.put("lurlexport_exportfile", sb.getDataPath() + "/DATA/EXPORT/yacy_export_" + sb.peers.myID() + "_" + GenericFormatter.SHORT_SECOND_FORMATTER.format());
|
||||
prop.put("lurlexport_exportfilepath", sb.getDataPath() + "/DATA/EXPORT/");
|
||||
if (export == null) {
|
||||
// there has never been an export
|
||||
prop.put("lurlexportfinished", 0);
|
||||
|
@ -87,7 +88,6 @@ public class IndexExport_p {
|
|||
}
|
||||
|
||||
if (post == null || env == null) {
|
||||
prop.putNum("ucount", ucount);
|
||||
return prop; // nothing to do
|
||||
}
|
||||
|
||||
|
@ -102,23 +102,25 @@ public class IndexExport_p {
|
|||
if (fname.endsWith("rss")) format = Fulltext.ExportFormat.rss;
|
||||
if (fname.endsWith("solr")) format = Fulltext.ExportFormat.solr;
|
||||
|
||||
// extend export file name
|
||||
String s = post.get("exportfile", "");
|
||||
if (s.indexOf('.',0) < 0) {
|
||||
if (format == Fulltext.ExportFormat.text) s = s + ".txt";
|
||||
if (format == Fulltext.ExportFormat.html) s = s + ".html";
|
||||
if (format == Fulltext.ExportFormat.rss ) s = s + "_rss.xml";
|
||||
if (format == Fulltext.ExportFormat.solr) s = s + "_full.xml";
|
||||
}
|
||||
final File f = new File(s);
|
||||
f.getParentFile().mkdirs();
|
||||
final String filter = post.get("exportfilter", ".*");
|
||||
final String query = post.get("exportquery", "*:*");
|
||||
final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom, text);
|
||||
|
||||
prop.put("lurlexport_exportfile", s);
|
||||
prop.put("lurlexport_urlcount", running.count());
|
||||
if ((running != null) && (running.failed() == null)) {
|
||||
final int maxseconds = post.getInt("exportmaxseconds", -1);
|
||||
final String path = post.get("exportfilepath", "");
|
||||
|
||||
// start the export
|
||||
try {
|
||||
export = sb.index.fulltext().export(format, filter, query, maxseconds, new File(path), dom, text);
|
||||
} catch (IOException e) {
|
||||
prop.put("lurlexporterror", 1);
|
||||
prop.put("lurlexporterror_exportfile", "-no export-");
|
||||
prop.put("lurlexporterror_exportfailmsg", e.getMessage());
|
||||
return prop;
|
||||
}
|
||||
|
||||
// show result
|
||||
prop.put("lurlexport_exportfile", export.file().toString());
|
||||
prop.put("lurlexport_urlcount", export.count());
|
||||
if ((export != null) && (export.failed() == null)) {
|
||||
prop.put("lurlexport", 2);
|
||||
}
|
||||
prop.put("reload", 1);
|
||||
|
@ -144,4 +146,4 @@ public class IndexExport_p {
|
|||
return prop;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -77,9 +77,11 @@ import net.yacy.search.schema.WebgraphConfiguration;
|
|||
import net.yacy.search.schema.WebgraphSchema;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.util.DateFormatUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public final class Fulltext {
|
||||
|
@ -617,9 +619,61 @@ public final class Fulltext {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static enum ExportFormat {
|
||||
text, html, rss, solr;
|
||||
text("txt"), html("html"), rss("rss"), solr("xml");
|
||||
private final String ext;
|
||||
private ExportFormat(String ext) {this.ext = ext;}
|
||||
public String getExt() {return this.ext;}
|
||||
}
|
||||
|
||||
public Export export(Fulltext.ExportFormat format, String filter, String query, final int maxseconds, File path, boolean dom, boolean text) throws IOException {
|
||||
|
||||
// modify query according to maxseconds
|
||||
long now = System.currentTimeMillis();
|
||||
if (maxseconds > 0) {
|
||||
long from = now - maxseconds * 1000L;
|
||||
String nowstr = DateFormatUtil.formatExternal(new Date(now));
|
||||
String fromstr = DateFormatUtil.formatExternal(new Date(from));
|
||||
String dateq = CollectionSchema.load_date_dt.getSolrFieldName() + ":[" + fromstr + " TO " + nowstr + "]";
|
||||
query = query == null || AbstractSolrConnector.CATCHALL_QUERY.equals(query) ? dateq : query + " AND " + dateq;
|
||||
} else {
|
||||
query = query == null? AbstractSolrConnector.CATCHALL_QUERY : query;
|
||||
}
|
||||
|
||||
// check the oldest and latest entry in the index for this query
|
||||
SolrDocumentList firstdoclist, lastdoclist;
|
||||
firstdoclist = this.getDefaultConnector().getDocumentListByQuery(
|
||||
query, CollectionSchema.load_date_dt.getSolrFieldName() + " asc", 0, 1,CollectionSchema.load_date_dt.getSolrFieldName());
|
||||
lastdoclist = this.getDefaultConnector().getDocumentListByQuery(
|
||||
query, CollectionSchema.load_date_dt.getSolrFieldName() + " desc", 0, 1,CollectionSchema.load_date_dt.getSolrFieldName());
|
||||
|
||||
if (firstdoclist.size() == 0 || lastdoclist.size() == 0) {
|
||||
assert firstdoclist.size() == 0 && lastdoclist.size() == 0;
|
||||
throw new IOException("number of exported documents == 0");
|
||||
}
|
||||
assert firstdoclist.size() == 1 && lastdoclist.size() == 1;
|
||||
long doccount = firstdoclist.getNumFound();
|
||||
|
||||
// create the export name
|
||||
SolrDocument firstdoc = firstdoclist.get(0);
|
||||
SolrDocument lastdoc = lastdoclist.get(0);
|
||||
Object firstdateobject = firstdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
|
||||
Object lastdateobject = lastdoc.getFieldValue(CollectionSchema.load_date_dt.getSolrFieldName());
|
||||
Date firstdate = (Date) firstdateobject;
|
||||
Date lastdate = (Date) lastdateobject;
|
||||
String s = new File(path, "yacy_dump_" +
|
||||
"f" + GenericFormatter.FORMAT_SHORT_MINUTE.format(firstdate) + "_" +
|
||||
"l" + GenericFormatter.FORMAT_SHORT_MINUTE.format(lastdate) + "_" +
|
||||
"n" + GenericFormatter.FORMAT_SHORT_MINUTE.format(new Date(now)) + "_" +
|
||||
"c" + String.format("%1$012d", doccount)).getAbsolutePath();
|
||||
|
||||
// create export file name
|
||||
if (s.indexOf('.',0) < 0) s += "." + format.getExt();
|
||||
final File f = new File(s);
|
||||
f.getParentFile().mkdirs();
|
||||
|
||||
return export(f, filter, query, format, dom, text);
|
||||
}
|
||||
|
||||
// export methods
|
||||
|
|
Loading…
Reference in New Issue
Block a user