mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
URL-export:add query and fix for cast-class-exception
This commit is contained in:
parent
ffe8276063
commit
987f410011
|
@ -193,6 +193,9 @@ function updatepage(str) {
|
|||
<dt class="TableCellDark">URL Filter</dt>
|
||||
<dd><input type="text" name="exportfilter" value=".*.*" size="20" maxlength="250" />
|
||||
</dd>
|
||||
<dt class="TableCellDark">query</dt>
|
||||
<dd><input type="text" name="exportquery" value="*:*" size="20" maxlength="250" />
|
||||
</dd>
|
||||
<dt class="TableCellDark">Export Format</dt>
|
||||
<dd>Only Domain:
|
||||
<input type="radio" name="format" value="dom-text" />Plain Text List (domains only)
|
||||
|
|
|
@ -261,7 +261,8 @@ public class IndexControlURLs_p {
|
|||
final File f = new File(s);
|
||||
f.getParentFile().mkdirs();
|
||||
final String filter = post.get("exportfilter", ".*");
|
||||
final Fulltext.Export running = segment.fulltext().export(f, filter, format, dom);
|
||||
final String query = post.get("exportquery", "*:*");
|
||||
final Fulltext.Export running = segment.fulltext().export(f, filter, query, format, dom);
|
||||
|
||||
prop.put("lurlexport_exportfile", s);
|
||||
prop.put("lurlexport_urlcount", running.count());
|
||||
|
|
|
@ -752,12 +752,12 @@ public final class Fulltext {
|
|||
}
|
||||
|
||||
// export methods
|
||||
public Export export(final File f, final String filter, final int format, final boolean dom) {
|
||||
public Export export(final File f, final String filter, final String query, final int format, final boolean dom) {
|
||||
if ((this.exportthread != null) && (this.exportthread.isAlive())) {
|
||||
ConcurrentLog.warn("LURL-EXPORT", "cannot start another export thread, already one running");
|
||||
return this.exportthread;
|
||||
}
|
||||
this.exportthread = new Export(f, filter, format, dom);
|
||||
this.exportthread = new Export(f, filter, query, format, dom);
|
||||
this.exportthread.start();
|
||||
return this.exportthread;
|
||||
}
|
||||
|
@ -770,14 +770,15 @@ public final class Fulltext {
|
|||
private final File f;
|
||||
private final Pattern pattern;
|
||||
private int count;
|
||||
private String failure;
|
||||
private String failure, query;
|
||||
private final int format;
|
||||
private final boolean dom;
|
||||
|
||||
private Export(final File f, final String filter, final int format, boolean dom) {
|
||||
private Export(final File f, final String filter, final String query, final int format, boolean dom) {
|
||||
// format: 0=text, 1=html, 2=rss/xml
|
||||
this.f = f;
|
||||
this.pattern = filter == null ? null : Pattern.compile(filter);
|
||||
this.query = query == null? "*:*" : query;
|
||||
this.count = 0;
|
||||
this.failure = null;
|
||||
this.format = format;
|
||||
|
@ -806,7 +807,7 @@ public final class Fulltext {
|
|||
|
||||
|
||||
if (this.dom) {
|
||||
Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
|
||||
Map<String, ReversibleScoreMap<String>> scores = Fulltext.this.getDefaultConnector().getFacets(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 100000000, CollectionSchema.host_s.getSolrFieldName());
|
||||
ReversibleScoreMap<String> stats = scores.get(CollectionSchema.host_s.getSolrFieldName());
|
||||
for (final String host: stats) {
|
||||
if (this.pattern != null && !this.pattern.matcher(host).matches()) continue;
|
||||
|
@ -815,21 +816,19 @@ public final class Fulltext {
|
|||
this.count++;
|
||||
}
|
||||
} else {
|
||||
BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
|
||||
BlockingQueue<SolrDocument> docs = Fulltext.this.getDefaultConnector().concurrentDocumentsByQuery(this.query + " AND " + CollectionSchema.httpstatus_i.getSolrFieldName() + ":200", 0, 100000000, 10 * 60 * 60 * 1000, 100,
|
||||
CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName(), CollectionSchema.title.getSolrFieldName(),
|
||||
CollectionSchema.author.getSolrFieldName(), CollectionSchema.description_txt.getSolrFieldName(), CollectionSchema.size_i.getSolrFieldName(), CollectionSchema.last_modified.getSolrFieldName());
|
||||
SolrDocument doc;
|
||||
ArrayList<?> title;
|
||||
String url, author, hash;
|
||||
String[] descriptions;
|
||||
String url, hash, title, author, description;
|
||||
Integer size;
|
||||
Date date;
|
||||
while ((doc = docs.take()) != AbstractSolrConnector.POISON_DOCUMENT) {
|
||||
hash = (String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName());
|
||||
url = (String) doc.getFieldValue(CollectionSchema.sku.getSolrFieldName());
|
||||
title = (ArrayList<?>) doc.getFieldValue(CollectionSchema.title.getSolrFieldName());
|
||||
author = (String) doc.getFieldValue(CollectionSchema.author.getSolrFieldName());
|
||||
descriptions = (String[]) doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName());
|
||||
hash = getStringFrom(doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
|
||||
url = getStringFrom(doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
|
||||
title = getStringFrom(doc.getFieldValue(CollectionSchema.title.getSolrFieldName()));
|
||||
author = getStringFrom(doc.getFieldValue(CollectionSchema.author.getSolrFieldName()));
|
||||
description = getStringFrom(doc.getFieldValue(CollectionSchema.description_txt.getSolrFieldName()));
|
||||
size = (Integer) doc.getFieldValue(CollectionSchema.size_i.getSolrFieldName());
|
||||
date = (Date) doc.getFieldValue(CollectionSchema.last_modified.getSolrFieldName());
|
||||
if (this.pattern != null && !this.pattern.matcher(url).matches()) continue;
|
||||
|
@ -837,16 +836,14 @@ public final class Fulltext {
|
|||
pw.println(url);
|
||||
}
|
||||
if (this.format == 1) {
|
||||
if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</a>");
|
||||
if (title != null) pw.println("<a href=\"" + MultiProtocolURL.escape(url) + "\">" + CharacterCoding.unicode2xml(title, true) + "</a>");
|
||||
}
|
||||
if (this.format == 2) {
|
||||
pw.println("<item>");
|
||||
if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml((String) title.iterator().next(), true) + "</title>");
|
||||
if (title != null) pw.println("<title>" + CharacterCoding.unicode2xml(title, true) + "</title>");
|
||||
pw.println("<link>" + MultiProtocolURL.escape(url) + "</link>");
|
||||
if (author != null && !author.isEmpty()) pw.println("<author>" + CharacterCoding.unicode2xml(author, true) + "</author>");
|
||||
if (descriptions != null && descriptions.length > 0) {
|
||||
for (String d: descriptions) pw.println("<description>" + CharacterCoding.unicode2xml(d, true) + "</description>");
|
||||
}
|
||||
if (description != null && !description.isEmpty()) pw.println("<description>" + CharacterCoding.unicode2xml(description, true) + "</description>");
|
||||
if (date != null) pw.println("<pubDate>" + HeaderFramework.formatRFC1123(date) + "</pubDate>");
|
||||
if (size != null) pw.println("<yacy:size>" + size.intValue() + "</yacy:size>");
|
||||
pw.println("<guid isPermaLink=\"false\">" + hash + "</guid>");
|
||||
|
@ -884,6 +881,13 @@ public final class Fulltext {
|
|||
public int count() {
|
||||
return this.count;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private String getStringFrom(final Object o) {
|
||||
if (o == null) return "";
|
||||
if (o instanceof ArrayList) return ((ArrayList<String>) o).get(0);
|
||||
return (String) o;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user