diff --git a/defaults/heuristicopensearch.conf b/defaults/heuristicopensearch.conf index 712674aba..6a3b494b9 100644 --- a/defaults/heuristicopensearch.conf +++ b/defaults/heuristicopensearch.conf @@ -12,7 +12,7 @@ ## - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines ## -#Blekko = http://blekko.com/ws/{searchTerms}+/rss # get 20 results from blekko +#Blekko = http://blekko.com/ws/{searchTerms} /rss # get 20 results from blekko #Faroo-News = http://www.faroo.com/api?q={searchTerms}&start={startIndex}&length=20&l=en&src=news&f=rss # get results from Faroo news-search #WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?} #Search WordPress.com Blogs #Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv diff --git a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java index defc0e1cc..038df8779 100644 --- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java +++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java @@ -23,7 +23,9 @@ import java.io.IOException; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedHashMap; import java.util.List; +import net.yacy.cora.document.encoding.ASCII; import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.feed.RSSFeed; import net.yacy.cora.document.feed.RSSMessage; @@ -39,6 +41,7 @@ import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.search.query.QueryParams; import net.yacy.search.schema.CollectionSchema; +import org.apache.http.entity.mime.content.ContentBody; /** * Handling of queries to remote OpenSearch systems. Iterates to a list of @@ -83,10 +86,10 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme // see http://www.loc.gov/standards/sru/ String searchurl = this.parseSearchTemplate(baseurl, query.getQueryGoal().getQueryString(false), 0, query.itemsPerPage); try { - MultiProtocolURL aurl = new MultiProtocolURL(MultiProtocolURL.unescape(searchurl)); + MultiProtocolURL aurl = new MultiProtocolURL(searchurl); try { this.lastaccesstime = System.currentTimeMillis(); - final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyIntranetCrawlerAgent); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent); byte[] result = httpClient.GETbytes(aurl, null, null, false); RSSReader rssReader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); if (rssReader != null) {