mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
refactoring
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7426 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
f5baf53391
commit
54e77e6255
|
@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit;
|
||||||
import net.yacy.cora.document.RSSMessage;
|
import net.yacy.cora.document.RSSMessage;
|
||||||
import net.yacy.cora.protocol.HeaderFramework;
|
import net.yacy.cora.protocol.HeaderFramework;
|
||||||
import net.yacy.cora.protocol.RequestHeader;
|
import net.yacy.cora.protocol.RequestHeader;
|
||||||
import net.yacy.cora.services.Search;
|
import net.yacy.cora.services.SearchSRURSS;
|
||||||
import net.yacy.document.geolocalization.Location;
|
import net.yacy.document.geolocalization.Location;
|
||||||
import de.anomic.data.LibraryProvider;
|
import de.anomic.data.LibraryProvider;
|
||||||
import de.anomic.search.Switchboard;
|
import de.anomic.search.Switchboard;
|
||||||
|
@ -93,7 +93,7 @@ public class yacysearch_location {
|
||||||
// get a queue of search results
|
// get a queue of search results
|
||||||
String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8080") + "/yacysearch.rss";
|
String rssSearchServiceURL = "http://localhost:" + sb.getConfig("port", "8080") + "/yacysearch.rss";
|
||||||
BlockingQueue<RSSMessage> results = new LinkedBlockingQueue<RSSMessage>();
|
BlockingQueue<RSSMessage> results = new LinkedBlockingQueue<RSSMessage>();
|
||||||
Search.searchSRURSS(rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false, results);
|
SearchSRURSS.searchSRURSS(results, rssSearchServiceURL, query, maximumTime, Integer.MAX_VALUE, false, false);
|
||||||
|
|
||||||
// take the results and compute some locations
|
// take the results and compute some locations
|
||||||
RSSMessage message;
|
RSSMessage message;
|
||||||
|
|
|
@ -62,7 +62,7 @@ import net.yacy.cora.document.RSSFeed;
|
||||||
import net.yacy.cora.document.RSSMessage;
|
import net.yacy.cora.document.RSSMessage;
|
||||||
import net.yacy.cora.document.RSSReader;
|
import net.yacy.cora.document.RSSReader;
|
||||||
import net.yacy.cora.protocol.http.HTTPConnector;
|
import net.yacy.cora.protocol.http.HTTPConnector;
|
||||||
import net.yacy.cora.services.Search;
|
import net.yacy.cora.services.SearchSRURSS;
|
||||||
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
import net.yacy.kelondro.data.meta.URIMetadataRow;
|
||||||
import net.yacy.kelondro.data.word.Word;
|
import net.yacy.kelondro.data.word.Word;
|
||||||
import net.yacy.kelondro.data.word.WordReference;
|
import net.yacy.kelondro.data.word.WordReference;
|
||||||
|
@ -372,7 +372,7 @@ public final class yacyClient {
|
||||||
public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException {
|
public static RSSFeed search(final yacySeed targetSeed, String query, boolean verify, boolean global, long timeout, int startRecord, int maximumRecords) throws IOException {
|
||||||
String address = (targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") : targetSeed.getClusterAddress();
|
String address = (targetSeed == null || targetSeed == Switchboard.getSwitchboard().peers.mySeed()) ? "localhost:" + Switchboard.getSwitchboard().getConfig("port", "8080") : targetSeed.getClusterAddress();
|
||||||
String urlBase = "http://" + address + "/yacysearch.rss";
|
String urlBase = "http://" + address + "/yacysearch.rss";
|
||||||
return Search.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global);
|
return SearchSRURSS.loadSRURSS(urlBase, query, timeout, startRecord, maximumRecords, verify, global);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
|
|
@ -45,6 +45,24 @@ public class RSSFeed implements Iterable<RSSMessage> {
|
||||||
this.maxsize = maxsize;
|
this.maxsize = maxsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* make a RSS feed using a set of urls
|
||||||
|
* the source string is assigned to all messages as author to mark the messages' origin
|
||||||
|
* @param links
|
||||||
|
* @param source
|
||||||
|
*/
|
||||||
|
public RSSFeed(Set<MultiProtocolURI> links, String source) {
|
||||||
|
this(Integer.MAX_VALUE);
|
||||||
|
String u;
|
||||||
|
RSSMessage message;
|
||||||
|
for (MultiProtocolURI uri: links) {
|
||||||
|
u = uri.toNormalform(true, false);
|
||||||
|
message = new RSSMessage(u, "", u);
|
||||||
|
message.setAuthor(source);
|
||||||
|
this.addMessage(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void setChannel(final RSSMessage channelItem) {
|
public void setChannel(final RSSMessage channelItem) {
|
||||||
this.channel = channelItem;
|
this.channel = channelItem;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
package net.yacy.cora.document;
|
package net.yacy.cora.document;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -31,7 +32,7 @@ import net.yacy.cora.date.GenericFormatter;
|
||||||
import net.yacy.cora.date.ISO8601Formatter;
|
import net.yacy.cora.date.ISO8601Formatter;
|
||||||
import net.yacy.cora.protocol.HeaderFramework;
|
import net.yacy.cora.protocol.HeaderFramework;
|
||||||
|
|
||||||
public class RSSMessage implements Hit {
|
public class RSSMessage implements Hit, Comparable<RSSMessage>, Comparator<RSSMessage> {
|
||||||
|
|
||||||
public static enum Token {
|
public static enum Token {
|
||||||
|
|
||||||
|
@ -58,13 +59,13 @@ public class RSSMessage implements Hit {
|
||||||
for (String s: k) this.keys.add(s);
|
for (String s: k) this.keys.add(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String valueFrom(Map<String, String> map) {
|
public String valueFrom(Map<String, String> map, String dflt) {
|
||||||
String value;
|
String value;
|
||||||
for (String key: this.keys) {
|
for (String key: this.keys) {
|
||||||
value = map.get(key);
|
value = map.get(key);
|
||||||
if (value != null) return value;
|
if (value != null) return value;
|
||||||
}
|
}
|
||||||
return "";
|
return dflt;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Set<String> keys() {
|
public Set<String> keys() {
|
||||||
|
@ -107,31 +108,50 @@ public class RSSMessage implements Hit {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTitle() {
|
public String getTitle() {
|
||||||
return Token.title.valueFrom(this.map);
|
return Token.title.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLink() {
|
public String getLink() {
|
||||||
return Token.link.valueFrom(this.map);
|
return Token.link.valueFrom(this.map, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
return (o instanceof RSSMessage) && ((RSSMessage) o).getLink().equals(this.getLink());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return getLink().hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(RSSMessage o) {
|
||||||
|
if (!(o instanceof RSSMessage)) return 1;
|
||||||
|
return this.getLink().compareTo(o.getLink());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(RSSMessage o1, RSSMessage o2) {
|
||||||
|
return o1.compareTo(o2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDescription() {
|
public String getDescription() {
|
||||||
return Token.description.valueFrom(this.map);
|
return Token.description.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAuthor() {
|
public String getAuthor() {
|
||||||
return Token.author.valueFrom(this.map);
|
return Token.author.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCopyright() {
|
public String getCopyright() {
|
||||||
return Token.copyright.valueFrom(this.map);
|
return Token.copyright.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCategory() {
|
public String getCategory() {
|
||||||
return Token.category.valueFrom(this.map);
|
return Token.category.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getSubject() {
|
public String[] getSubject() {
|
||||||
String subject = Token.subject.valueFrom(this.map);
|
String subject = Token.subject.valueFrom(this.map, "");
|
||||||
if (subject.indexOf(',') >= 0) return subject.split(",");
|
if (subject.indexOf(',') >= 0) return subject.split(",");
|
||||||
if (subject.indexOf(';') >= 0) return subject.split(";");
|
if (subject.indexOf(';') >= 0) return subject.split(";");
|
||||||
if (subject.indexOf('|') >= 0) return subject.split("|");
|
if (subject.indexOf('|') >= 0) return subject.split("|");
|
||||||
|
@ -139,15 +159,15 @@ public class RSSMessage implements Hit {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getReferrer() {
|
public String getReferrer() {
|
||||||
return Token.referrer.valueFrom(this.map);
|
return Token.referrer.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLanguage() {
|
public String getLanguage() {
|
||||||
return Token.language.valueFrom(this.map);
|
return Token.language.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Date getPubDate() {
|
public Date getPubDate() {
|
||||||
String dateString = Token.pubDate.valueFrom(this.map);
|
String dateString = Token.pubDate.valueFrom(this.map, "");
|
||||||
Date date;
|
Date date;
|
||||||
try {
|
try {
|
||||||
date = ISO8601Formatter.FORMATTER.parse(dateString);
|
date = ISO8601Formatter.FORMATTER.parse(dateString);
|
||||||
|
@ -162,20 +182,20 @@ public class RSSMessage implements Hit {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getGuid() {
|
public String getGuid() {
|
||||||
return Token.guid.valueFrom(this.map);
|
return Token.guid.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTTL() {
|
public String getTTL() {
|
||||||
return Token.ttl.valueFrom(this.map);
|
return Token.ttl.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDocs() {
|
public String getDocs() {
|
||||||
return Token.docs.valueFrom(this.map);
|
return Token.docs.valueFrom(this.map, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getSize() {
|
public long getSize() {
|
||||||
String size = Token.size.valueFrom(this.map);
|
String size = Token.size.valueFrom(this.map, "-1");
|
||||||
return (size == null || size.length() == 0) ? 0 : Long.parseLong(size);
|
return (size == null || size.length() == 0) ? -1 : Long.parseLong(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getFulltext() {
|
public String getFulltext() {
|
||||||
|
|
|
@ -1,313 +0,0 @@
|
||||||
/**
|
|
||||||
* Search
|
|
||||||
* Copyright 2010 by Michael Peter Christen
|
|
||||||
* First released 25.05.2010 at http://yacy.net
|
|
||||||
*
|
|
||||||
* This library is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This library is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public License
|
|
||||||
* along with this program in the file lgpl21.txt
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package net.yacy.cora.services;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.BlockingQueue;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import net.yacy.cora.document.MultiProtocolURI;
|
|
||||||
import net.yacy.cora.document.RSSFeed;
|
|
||||||
import net.yacy.cora.document.RSSMessage;
|
|
||||||
import net.yacy.cora.document.RSSReader;
|
|
||||||
import net.yacy.cora.protocol.HeaderFramework;
|
|
||||||
import net.yacy.cora.protocol.RequestHeader;
|
|
||||||
import net.yacy.cora.protocol.http.HTTPClient;
|
|
||||||
import net.yacy.cora.protocol.http.HTTPConnector;
|
|
||||||
import net.yacy.cora.protocol.http.LinkExtractor;
|
|
||||||
import net.yacy.cora.storage.ScoreMap;
|
|
||||||
|
|
||||||
import org.apache.http.entity.mime.content.ContentBody;
|
|
||||||
import org.apache.http.entity.mime.content.StringBody;
|
|
||||||
|
|
||||||
public class Search extends Thread {
|
|
||||||
|
|
||||||
private final static int recordsPerSession = 10;
|
|
||||||
|
|
||||||
public static final String[] SRURSSServicesList = {
|
|
||||||
"http://yacy.dyndns.org:8000/yacysearch.rss",
|
|
||||||
"http://yacy.caloulinux.net:8085/yacysearch.rss",
|
|
||||||
"http://algire.dyndns.org:8085/yacysearch.rss",
|
|
||||||
"http://breyvogel.dyndns.org:8002/yacysearch.rss"
|
|
||||||
};
|
|
||||||
|
|
||||||
public static final String[] genericServicesList = {
|
|
||||||
"http://www.scroogle.org/cgi-bin/nbbw.cgi?Gw=$&n=2",
|
|
||||||
"http://blekko.com/ws/$+/rss",
|
|
||||||
"http://www.bing.com/search?q=$&format=rss",
|
|
||||||
"http://search.twitter.com/search.atom?q=$"
|
|
||||||
};
|
|
||||||
|
|
||||||
public static Thread accumulateSRURSS(
|
|
||||||
final String urlBase,
|
|
||||||
final String query,
|
|
||||||
final long timeoutInit,
|
|
||||||
final int maximumRecordsInit,
|
|
||||||
final boolean verify,
|
|
||||||
final boolean global,
|
|
||||||
final Map<MultiProtocolURI, List<Integer>> result) {
|
|
||||||
Thread t = new Thread() {
|
|
||||||
BlockingQueue<RSSMessage> results = new LinkedBlockingQueue<RSSMessage>();
|
|
||||||
public void run() {
|
|
||||||
searchSRURSS(urlBase, query, timeoutInit, maximumRecordsInit, verify, global, results);
|
|
||||||
int p = 1;
|
|
||||||
RSSMessage message;
|
|
||||||
try {
|
|
||||||
while ((message = results.poll(timeoutInit, TimeUnit.MILLISECONDS)) != RSSMessage.POISON) {
|
|
||||||
MultiProtocolURI uri;
|
|
||||||
if (message == null) break;
|
|
||||||
try {
|
|
||||||
uri = new MultiProtocolURI(message.getLink());
|
|
||||||
List<Integer> m = result.get(uri);
|
|
||||||
if (m == null) m = new ArrayList<Integer>();
|
|
||||||
m.add(new Integer(p++));
|
|
||||||
result.put(uri, m);
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
t.start();
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Thread searchSRURSS(
|
|
||||||
final String urlBase,
|
|
||||||
final String query,
|
|
||||||
final long timeoutInit,
|
|
||||||
final int maximumRecordsInit,
|
|
||||||
final boolean verify,
|
|
||||||
final boolean global,
|
|
||||||
final BlockingQueue<RSSMessage> queue) {
|
|
||||||
Thread job = new Thread() {
|
|
||||||
public void run() {
|
|
||||||
int startRecord = 0;
|
|
||||||
RSSMessage message;
|
|
||||||
int maximumRecords = maximumRecordsInit;
|
|
||||||
long timeout = timeoutInit;
|
|
||||||
mainloop: while (timeout > 0 && maximumRecords > 0) {
|
|
||||||
long st = System.currentTimeMillis();
|
|
||||||
RSSFeed feed;
|
|
||||||
try {
|
|
||||||
feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global);
|
|
||||||
} catch (IOException e1) {
|
|
||||||
break mainloop;
|
|
||||||
}
|
|
||||||
if (feed == null || feed.isEmpty()) break mainloop;
|
|
||||||
maximumRecords -= feed.size();
|
|
||||||
innerloop: while (!feed.isEmpty()) {
|
|
||||||
message = feed.pollMessage();
|
|
||||||
if (message == null) break innerloop;
|
|
||||||
try {
|
|
||||||
queue.put(message);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
break innerloop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
startRecord += recordsPerSession;
|
|
||||||
timeout -= System.currentTimeMillis() - st;
|
|
||||||
}
|
|
||||||
try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
job.start();
|
|
||||||
return job;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* send a query to a yacy public search interface
|
|
||||||
* @param rssSearchServiceURL the target url base (everything before the ? that follows the SRU request syntax properties). can null, then the local peer is used
|
|
||||||
* @param query the query as string
|
|
||||||
* @param startRecord number of first record
|
|
||||||
* @param maximumRecords maximum number of records
|
|
||||||
* @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
|
|
||||||
* @param global if true also search results from other peers are included
|
|
||||||
* @param timeout milliseconds that are waited at maximum for a search result
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static RSSFeed loadSRURSS(
|
|
||||||
String rssSearchServiceURL,
|
|
||||||
String query,
|
|
||||||
long timeout,
|
|
||||||
int startRecord,
|
|
||||||
int maximumRecords,
|
|
||||||
boolean verify,
|
|
||||||
boolean global) throws IOException {
|
|
||||||
MultiProtocolURI uri = null;
|
|
||||||
try {
|
|
||||||
uri = new MultiProtocolURI(rssSearchServiceURL);
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
// send request
|
|
||||||
try {
|
|
||||||
final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
|
|
||||||
parts.put("query", new StringBody(query));
|
|
||||||
parts.put("startRecord", new StringBody(Integer.toString(startRecord)));
|
|
||||||
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
|
|
||||||
parts.put("verify", new StringBody(verify ? "true" : "false"));
|
|
||||||
parts.put("resource", new StringBody(global ? "global" : "local"));
|
|
||||||
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
|
|
||||||
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
|
|
||||||
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
|
|
||||||
if (reader == null) {
|
|
||||||
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
|
|
||||||
}
|
|
||||||
final RSSFeed feed = reader.getFeed();
|
|
||||||
if (feed == null) {
|
|
||||||
// case where the rss reader does not understand the content
|
|
||||||
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (2)");
|
|
||||||
}
|
|
||||||
return feed;
|
|
||||||
} catch (final IOException e) {
|
|
||||||
throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Thread accumulateGeneric(
|
|
||||||
String query,
|
|
||||||
String service,
|
|
||||||
final Map<MultiProtocolURI, List<Integer>> result,
|
|
||||||
final int timeout) {
|
|
||||||
query = query.replace(' ', '+');
|
|
||||||
final String servicePatched = service.replaceAll("\\$", query);
|
|
||||||
Thread t = new Thread() {
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
MultiProtocolURI[] sr = loadGeneric(new MultiProtocolURI(servicePatched), timeout);
|
|
||||||
int p = 1;
|
|
||||||
for (MultiProtocolURI u: sr) {
|
|
||||||
List<Integer> m = result.get(u);
|
|
||||||
if (m == null) m = new ArrayList<Integer>();
|
|
||||||
m.add(new Integer(p++));
|
|
||||||
result.put(u, m);
|
|
||||||
}
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
t.start();
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static MultiProtocolURI[] loadGeneric(MultiProtocolURI uri, long timeout) throws IOException {
|
|
||||||
final RequestHeader requestHeader = new RequestHeader();
|
|
||||||
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
|
|
||||||
final HTTPClient client = new HTTPClient();
|
|
||||||
client.setTimout((int) timeout);
|
|
||||||
client.setHeader(requestHeader.entrySet());
|
|
||||||
byte[] result = client.GETbytes(uri.toString());
|
|
||||||
client.finish();
|
|
||||||
if (client.getStatusCode() != 200) {
|
|
||||||
throw new IOException("Server returned status: " + client.getHttpResponse().getStatusLine());
|
|
||||||
}
|
|
||||||
if (result == null) throw new IOException("cora.Search error asking peer '" + uri.getHost() + "': null");
|
|
||||||
LinkExtractor le = new LinkExtractor(Pattern.compile(".*" + uri.getHost() + ".*"));
|
|
||||||
le.scrape(new String(result));
|
|
||||||
MultiProtocolURI[] links = le.getLinks();
|
|
||||||
return links;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static RSSFeed links2feed(Set<MultiProtocolURI> links, String source) {
|
|
||||||
RSSFeed feed = new RSSFeed(Integer.MAX_VALUE);
|
|
||||||
String u;
|
|
||||||
RSSMessage message;
|
|
||||||
for (MultiProtocolURI uri: links) {
|
|
||||||
u = uri.toNormalform(true, false);
|
|
||||||
message = new RSSMessage(u, "", u);
|
|
||||||
message.setAuthor(source);
|
|
||||||
feed.addMessage(message);
|
|
||||||
}
|
|
||||||
return feed;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Map<MultiProtocolURI, List<Integer>> result;
|
|
||||||
private String query;
|
|
||||||
private int count;
|
|
||||||
private String[] yacyServices, rssServices, genericServices;
|
|
||||||
private List<Thread> threads;
|
|
||||||
|
|
||||||
public Search(String query, int count, String[] rssServices, String[] genericServices) {
|
|
||||||
this.result = new ConcurrentHashMap<MultiProtocolURI, List<Integer>>();
|
|
||||||
this.query = query;
|
|
||||||
this.count = count;
|
|
||||||
this.yacyServices = yacyServices;
|
|
||||||
this.rssServices = rssServices;
|
|
||||||
this.genericServices = genericServices;
|
|
||||||
this.threads = new ArrayList<Thread>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void run() {
|
|
||||||
for (String service: this.rssServices) threads.add(accumulateSRURSS(service, this.query, 10000, this.count, false, true, this.result));
|
|
||||||
for (String service: this.genericServices) threads.add(accumulateGeneric(this.query, service, this.result, 10000));
|
|
||||||
}
|
|
||||||
|
|
||||||
public ScoreMap<MultiProtocolURI> getResults() {
|
|
||||||
ScoreMap<MultiProtocolURI> scores = new ScoreMap<MultiProtocolURI>();
|
|
||||||
int m = this.rssServices.length + this.genericServices.length;
|
|
||||||
for (Map.Entry<MultiProtocolURI, List<Integer>> entry: this.result.entrySet()) {
|
|
||||||
int a = 0;
|
|
||||||
for (Integer i : entry.getValue()) a += i.intValue();
|
|
||||||
scores.inc(entry.getKey(), a * m / entry.getValue().size());
|
|
||||||
}
|
|
||||||
return scores;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void waitTermination() {
|
|
||||||
for (Thread t: threads) try {t.join();} catch (InterruptedException e) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
for (String s: args) sb.append(s).append(' ');
|
|
||||||
String query = sb.toString().trim();
|
|
||||||
Search search = new Search(query, 100, SRURSSServicesList, genericServicesList);
|
|
||||||
search.start();
|
|
||||||
try {Thread.sleep(100);} catch (InterruptedException e1) {}
|
|
||||||
search.waitTermination();
|
|
||||||
ScoreMap<MultiProtocolURI> result = search.getResults();
|
|
||||||
Iterator<MultiProtocolURI> i = result.keys(true);
|
|
||||||
MultiProtocolURI u;
|
|
||||||
while (i.hasNext()) {
|
|
||||||
u = i.next();
|
|
||||||
System.out.println("[" + result.get(u) + "] " + u.toNormalform(true, false));
|
|
||||||
}
|
|
||||||
try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) {}
|
|
||||||
}
|
|
||||||
}
|
|
40
source/net/yacy/cora/services/SearchAccumulator.java
Normal file
40
source/net/yacy/cora/services/SearchAccumulator.java
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* Accumulator
|
||||||
|
* Copyright 2010 by Michael Peter Christen
|
||||||
|
* First released 07.01.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.services;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* place-holder class to provide a object declaration for threads in Search object
|
||||||
|
*/
|
||||||
|
public interface SearchAccumulator extends Runnable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* join this accumulator: wait until it terminates
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void join() throws InterruptedException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test if the accumulator is still running
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isAlive();
|
||||||
|
|
||||||
|
}
|
164
source/net/yacy/cora/services/SearchHub.java
Normal file
164
source/net/yacy/cora/services/SearchHub.java
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
/**
|
||||||
|
* Search
|
||||||
|
* Copyright 2010 by Michael Peter Christen
|
||||||
|
* First released 25.05.2010 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General private
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General private License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General private License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.services;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.RSSMessage;
|
||||||
|
import net.yacy.cora.protocol.http.HTTPClient;
|
||||||
|
import net.yacy.cora.storage.ScoreMap;
|
||||||
|
|
||||||
|
public class SearchHub {
|
||||||
|
|
||||||
|
private static final String[] SRURSSServicesList = {
|
||||||
|
"http://yacy.dyndns.org:8000/yacysearch.rss",
|
||||||
|
"http://yacy.caloulinux.net:8085/yacysearch.rss",
|
||||||
|
"http://algire.dyndns.org:8085/yacysearch.rss",
|
||||||
|
"http://breyvogel.dyndns.org:8002/yacysearch.rss"
|
||||||
|
};
|
||||||
|
|
||||||
|
public final static SearchHub EMPTY = new SearchHub("", 0);
|
||||||
|
|
||||||
|
private String query;
|
||||||
|
private int timeout;
|
||||||
|
private List<SearchAccumulator> threads;
|
||||||
|
private Map<RSSMessage, List<Integer>> result;
|
||||||
|
|
||||||
|
public SearchHub(final String query, final int timeout) {
|
||||||
|
this.query = query;
|
||||||
|
this.timeout = timeout;
|
||||||
|
this.threads = new ArrayList<SearchAccumulator>();
|
||||||
|
this.result = new ConcurrentHashMap<RSSMessage, List<Integer>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the result of the accumulation
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Map<RSSMessage, List<Integer>> getAccumulation() {
|
||||||
|
return this.result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add an accumulator to the list of accumulation theads.
|
||||||
|
* this is mainly used for awaitTermination() and isTerminated()
|
||||||
|
* @param a
|
||||||
|
*/
|
||||||
|
public void addAccumulator(SearchAccumulator a) {
|
||||||
|
this.threads.add(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the original query string
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public String getQuery() {
|
||||||
|
return this.query;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the given time-out of the search request
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getTimeout() {
|
||||||
|
return this.timeout;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the list of search results as scored map.
|
||||||
|
* The results are combined using their appearance positions.
|
||||||
|
* Every time this method is called the list is re-computed to reflect the latest results
|
||||||
|
* @return a score map of urls
|
||||||
|
*/
|
||||||
|
public ScoreMap<String> getResults() {
|
||||||
|
ScoreMap<String> scores = new ScoreMap<String>();
|
||||||
|
int m = threads.size();
|
||||||
|
for (Map.Entry<RSSMessage, List<Integer>> entry: this.result.entrySet()) {
|
||||||
|
int a = 0;
|
||||||
|
for (Integer i : entry.getValue()) a += i.intValue();
|
||||||
|
scores.inc(entry.getKey().getLink(), a * m / entry.getValue().size());
|
||||||
|
}
|
||||||
|
return scores;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* wait until all accumulation threads have terminated
|
||||||
|
*/
|
||||||
|
public void waitTermination() {
|
||||||
|
for (SearchAccumulator t: threads) try {t.join();} catch (InterruptedException e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return true if all accumulation threads have terminated
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isTerminated() {
|
||||||
|
for (SearchAccumulator t: threads) if (t.isAlive()) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return a hash code of the search hub.
|
||||||
|
* This is computed using only the query string because that identifies the object
|
||||||
|
*/
|
||||||
|
public int hashCode() {
|
||||||
|
return query.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test method to add a list of SRU RSS services.
|
||||||
|
* such services are provided by YaCy peers
|
||||||
|
* @param search
|
||||||
|
* @param rssServices
|
||||||
|
* @param count
|
||||||
|
* @param verify
|
||||||
|
* @param global
|
||||||
|
*/
|
||||||
|
public static void addSRURSSServices(SearchHub search, String[] rssServices, int count, boolean verify, boolean global) {
|
||||||
|
for (String service: rssServices) {
|
||||||
|
SearchSRURSS accumulator = new SearchSRURSS(search, service, count, verify, global);
|
||||||
|
accumulator.start();
|
||||||
|
search.addAccumulator(accumulator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (String s: args) sb.append(s).append(' ');
|
||||||
|
String query = sb.toString().trim();
|
||||||
|
SearchHub search = new SearchHub(query, 10000);
|
||||||
|
addSRURSSServices(search, SRURSSServicesList, 100, false, false);
|
||||||
|
try {Thread.sleep(100);} catch (InterruptedException e1) {}
|
||||||
|
search.waitTermination();
|
||||||
|
ScoreMap<String> result = search.getResults();
|
||||||
|
Iterator<String> i = result.keys(true);
|
||||||
|
String u;
|
||||||
|
while (i.hasNext()) {
|
||||||
|
u = i.next();
|
||||||
|
System.out.println("[" + result.get(u) + "] " + u);
|
||||||
|
}
|
||||||
|
try {HTTPClient.closeConnectionManager();} catch (InterruptedException e) {}
|
||||||
|
}
|
||||||
|
}
|
201
source/net/yacy/cora/services/SearchSRURSS.java
Normal file
201
source/net/yacy/cora/services/SearchSRURSS.java
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
/**
|
||||||
|
* AccumulateSRURSS
|
||||||
|
* Copyright 2010 by Michael Peter Christen
|
||||||
|
* First released 06.01.2011 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.services;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.entity.mime.content.ContentBody;
|
||||||
|
import org.apache.http.entity.mime.content.StringBody;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.MultiProtocolURI;
|
||||||
|
import net.yacy.cora.document.RSSFeed;
|
||||||
|
import net.yacy.cora.document.RSSMessage;
|
||||||
|
import net.yacy.cora.document.RSSReader;
|
||||||
|
import net.yacy.cora.protocol.http.HTTPConnector;
|
||||||
|
|
||||||
|
public class SearchSRURSS extends Thread implements SearchAccumulator {
|
||||||
|
|
||||||
|
private final static int recordsPerSession = 10;
|
||||||
|
|
||||||
|
final String urlBase;
|
||||||
|
final String query;
|
||||||
|
final long timeoutInit;
|
||||||
|
final int maximumRecordsInit;
|
||||||
|
final boolean verify;
|
||||||
|
final boolean global;
|
||||||
|
final Map<RSSMessage, List<Integer>> result;
|
||||||
|
|
||||||
|
private final BlockingQueue<RSSMessage> results;
|
||||||
|
|
||||||
|
public SearchSRURSS(
|
||||||
|
final Map<RSSMessage, List<Integer>> result,
|
||||||
|
final String query,
|
||||||
|
final long timeoutInit,
|
||||||
|
final String urlBase,
|
||||||
|
final int maximumRecordsInit,
|
||||||
|
final boolean verify,
|
||||||
|
final boolean global) {
|
||||||
|
this.results = new LinkedBlockingQueue<RSSMessage>();
|
||||||
|
this.result = result;
|
||||||
|
this.query = query;
|
||||||
|
this.timeoutInit = timeoutInit;
|
||||||
|
this.urlBase = urlBase;
|
||||||
|
this.maximumRecordsInit = maximumRecordsInit;
|
||||||
|
this.verify = verify;
|
||||||
|
this.global = global;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchSRURSS(
|
||||||
|
final SearchHub search,
|
||||||
|
final String urlBase,
|
||||||
|
final int maximumRecordsInit,
|
||||||
|
final boolean verify,
|
||||||
|
final boolean global) {
|
||||||
|
this.results = new LinkedBlockingQueue<RSSMessage>();
|
||||||
|
this.result = search.getAccumulation();
|
||||||
|
this.query = search.getQuery();
|
||||||
|
this.timeoutInit = search.getTimeout();
|
||||||
|
this.urlBase = urlBase;
|
||||||
|
this.maximumRecordsInit = maximumRecordsInit;
|
||||||
|
this.verify = verify;
|
||||||
|
this.global = global;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
searchSRURSS(results, urlBase, query, timeoutInit, maximumRecordsInit, verify, global);
|
||||||
|
int p = 1;
|
||||||
|
RSSMessage message;
|
||||||
|
try {
|
||||||
|
while ((message = results.poll(timeoutInit, TimeUnit.MILLISECONDS)) != RSSMessage.POISON) {
|
||||||
|
if (message == null) break;
|
||||||
|
List<Integer> m = result.get(message.getLink());
|
||||||
|
if (m == null) m = new ArrayList<Integer>();
|
||||||
|
m.add(new Integer(p++));
|
||||||
|
result.put(message, m);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Thread searchSRURSS(
|
||||||
|
final BlockingQueue<RSSMessage> queue,
|
||||||
|
final String urlBase,
|
||||||
|
final String query,
|
||||||
|
final long timeoutInit,
|
||||||
|
final int maximumRecordsInit,
|
||||||
|
final boolean verify,
|
||||||
|
final boolean global) {
|
||||||
|
Thread job = new Thread() {
|
||||||
|
public void run() {
|
||||||
|
int startRecord = 0;
|
||||||
|
RSSMessage message;
|
||||||
|
int maximumRecords = maximumRecordsInit;
|
||||||
|
long timeout = timeoutInit;
|
||||||
|
mainloop: while (timeout > 0 && maximumRecords > 0) {
|
||||||
|
long st = System.currentTimeMillis();
|
||||||
|
RSSFeed feed;
|
||||||
|
try {
|
||||||
|
feed = loadSRURSS(urlBase, query, timeout, startRecord, recordsPerSession, verify, global);
|
||||||
|
} catch (IOException e1) {
|
||||||
|
break mainloop;
|
||||||
|
}
|
||||||
|
if (feed == null || feed.isEmpty()) break mainloop;
|
||||||
|
maximumRecords -= feed.size();
|
||||||
|
innerloop: while (!feed.isEmpty()) {
|
||||||
|
message = feed.pollMessage();
|
||||||
|
if (message == null) break innerloop;
|
||||||
|
try {
|
||||||
|
queue.put(message);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
break innerloop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
startRecord += recordsPerSession;
|
||||||
|
timeout -= System.currentTimeMillis() - st;
|
||||||
|
}
|
||||||
|
try { queue.put(RSSMessage.POISON); } catch (InterruptedException e) {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
job.start();
|
||||||
|
return job;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* send a query to a yacy public search interface
|
||||||
|
* @param rssSearchServiceURL the target url base (everything before the ? that follows the SRU request syntax properties). can null, then the local peer is used
|
||||||
|
* @param query the query as string
|
||||||
|
* @param startRecord number of first record
|
||||||
|
* @param maximumRecords maximum number of records
|
||||||
|
* @param verify if true, result entries are verified using the snippet fetch (slow); if false simply the result is returned
|
||||||
|
* @param global if true also search results from other peers are included
|
||||||
|
* @param timeout milliseconds that are waited at maximum for a search result
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static RSSFeed loadSRURSS(
|
||||||
|
String rssSearchServiceURL,
|
||||||
|
String query,
|
||||||
|
long timeout,
|
||||||
|
int startRecord,
|
||||||
|
int maximumRecords,
|
||||||
|
boolean verify,
|
||||||
|
boolean global) throws IOException {
|
||||||
|
MultiProtocolURI uri = null;
|
||||||
|
try {
|
||||||
|
uri = new MultiProtocolURI(rssSearchServiceURL);
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
throw new IOException("cora.Search failed asking peer '" + rssSearchServiceURL + "': bad url, " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// send request
|
||||||
|
try {
|
||||||
|
final LinkedHashMap<String,ContentBody> parts = new LinkedHashMap<String,ContentBody>();
|
||||||
|
parts.put("query", new StringBody(query));
|
||||||
|
parts.put("startRecord", new StringBody(Integer.toString(startRecord)));
|
||||||
|
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
|
||||||
|
parts.put("verify", new StringBody(verify ? "true" : "false"));
|
||||||
|
parts.put("resource", new StringBody(global ? "global" : "local"));
|
||||||
|
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
|
||||||
|
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
|
||||||
|
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
|
||||||
|
if (reader == null) {
|
||||||
|
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (1), reader == null");
|
||||||
|
}
|
||||||
|
final RSSFeed feed = reader.getFeed();
|
||||||
|
if (feed == null) {
|
||||||
|
// case where the rss reader does not understand the content
|
||||||
|
throw new IOException("cora.Search failed asking peer '" + uri.getHost() + "': probably bad response from remote peer (2)");
|
||||||
|
}
|
||||||
|
return feed;
|
||||||
|
} catch (final IOException e) {
|
||||||
|
throw new IOException("cora.Search error asking peer '" + uri.getHost() + "':" + e.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -166,8 +166,7 @@ public class SnippetExtractor {
|
||||||
assert maxpos >= minpos;
|
assert maxpos >= minpos;
|
||||||
final int newlen = Math.max(10, maxpos - minpos + 10);
|
final int newlen = Math.max(10, maxpos - minpos + 10);
|
||||||
final int around = (maxLength - newlen) / 2;
|
final int around = (maxLength - newlen) / 2;
|
||||||
assert minpos - around < sentence.length() : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
|
assert minpos - around < sentence.length() : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length(); //maxpos = 435, minpos = 17, around = -124, sentence.length() = 44
|
||||||
//assert ((maxpos + around) <= sentence.length()) && ((maxpos + around) <= sentence.length()) : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
|
|
||||||
sentence = "[..] " + sentence.substring(minpos - around, ((maxpos + around) > sentence.length()) ? sentence.length() : (maxpos + around)).trim() + " [..]";
|
sentence = "[..] " + sentence.substring(minpos - around, ((maxpos + around) > sentence.length()) ? sentence.length() : (maxpos + around)).trim() + " [..]";
|
||||||
minpos = around;
|
minpos = around;
|
||||||
maxpos = sentence.length() - around - 5;
|
maxpos = sentence.length() - around - 5;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user