URL-export:add query and fix for cast-class-exception

This commit is contained in:
sixcooler 2013-11-06 19:22:26 +01:00
parent ffe8276063
commit 987f410011
3 changed files with 28 additions and 20 deletions

View File

@ -193,6 +193,9 @@ function updatepage(str) {
<dt class="TableCellDark">URL Filter</dt>
<dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
</dd>
<dt class="TableCellDark">query</dt>
<dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
</dd>
<dt class="TableCellDark">Export Format</dt>
<dd>Only Domain:
<input type="radio" name="format" value="dom-text" />Plain Text List (domains only)&nbsp;&nbsp;

View File

@ -261,7 +261,8 @@ public class IndexControlURLs_p {
final File f = new File(s);
f.getParentFile().mkdirs();
final String filter = post.get("exportfilter", ".*");
final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
final String query = post.get("exportquery", "*:*");
final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);
prop.put("lurlexport_exportfile", s);
prop.put("lurlexport_urlcount", running.count());

View File

@ -752,12 +752,12 @@ public final class Fulltext {
}
// export methods
public Export export(final File f, final String filter, final int format, final boolean dom) {
public Export export(final File f, final String filter, final String query, final int format, final boolean dom) {
if ((this.exportthread != null) && (this.exportthread.isAlive())) {
ConcurrentLog.warn("LURL-EXPORT", "cannot start another export thread, already one running");
return this.exportthread;
}
this.exportthread = new Export(f, filter, format, dom);
this.exportthread = new Export(f, filter, query, format, dom);
this.exportthread.start();
return this.exportthread;
}
@ -770,14 +770,15 @@ public final class Fulltext {
private final File f;
private final Pattern pattern;
private int count;
private String failure;
private String failure, query;
private final int format;
private final boolean dom;
private Export(final File f, final String filter, final int format, boolean dom) {
private Export(final File f, final String filter, final String query, final int format, boolean dom) {
// format: 0=text, 1=html, 2=rss/xml
this.f = f;
this.pattern = filter == null ? null : Pattern.compile(filter);
this.query = query == null? "*:*" : query;
this.count = 0;
this.failure = null;
this.format = format;
@ -806,7 +807,7 @@ public final class Fulltext {
if (this.dom) {
Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
ReversibleScoreMap<String> stats = scores.get(CollectionSchema.host_s.getSolrFieldName());
for (final String host: stats) {
if (this.pattern != null && !this.pattern.matcher(host).matches()) continue;
@ -815,21 +816,19 @@ public final class Fulltext {
this.count++;
}
} else {
BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
SolrDocument doc;
ArrayList<?> title;
String url, author, hash;
String[] descriptions;
String url, hash, title, author, description;
Integer size;
Date date;
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
title = (ArrayList<?>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
author = (String) doc.getFieldValue(CollectionSchema.author.getSolrFieldName());
descriptions = (String[]) doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName());
hash = getStringFrom(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
url = getStringFrom(doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
title = getStringFrom(doc.getFieldValue(CollectionSchema.title.getSolrFieldName()));
author = getStringFrom(doc.getFieldValue(CollectionSchema.author.getSolrFieldName()));
description = getStringFrom(doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName()));
size = (Integer) doc.getFieldValue(CollectionSchema.size_i.getSolrFieldName());
date = (Date) doc.getFieldValue(CollectionSchema.last_modified.getSolrFieldName());
if (this.pattern != null && !this.pattern.matcher(url).matches()) continue;
@ -837,16 +836,14 @@ public final class Fulltext {
pw.println(url);
}
if (this.format == 1) {
if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</a>");
if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml(title, true) + "</a>");
}
if (this.format == 2) {
pw.println("<item>");
if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</title>");
if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml(title, true) + "</title>");
pw.println("<link>" + MultiProtocolURL.escape(url) + "</link>");
if (author != null && !author.isEmpty()) pw.println("<author>" + CharacterCoding.unicode2xml(author, true) + "</author>");
if (descriptions != null && descriptions.length > 0) {
for (String d: descriptions) pw.println("<description>" + CharacterCoding.unicode2xml(d, true) + "</description>");
}
if (description != null && !description.isEmpty()) pw.println("<description>" + CharacterCoding.unicode2xml(description, true) + "</description>");
if (date != null) pw.println("<pubDate>" + HeaderFramework.formatRFC1123(date) + "</pubDate>");
if (size != null) pw.println("<yacy:size>" + size.intValue() + "</yacy:size>");
pw.println("<guid isPermaLink=\"false\">" + hash + "</guid>");
@ -884,6 +881,13 @@ public final class Fulltext {
public int count() {
return this.count;
}
@SuppressWarnings("unchecked")
private String getStringFrom(final Object o) {
if (o == null) return "";
if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
return (String) o;
}
}