fix Umlaut handling in blekko heuristic search term

http://mantis.tokeek.de/view.php?id=169 observation: blekko seams to block xxxbot agents (=0 results)
2024-09-19 00:01:41 +02:00 · 2015-02-08 23:40:33 +01:00 · 2015-02-08 23:40:33 +01:00 · fe6f5a395d
commit fe6f5a395d
parent ab98f69592
2 changed files with 6 additions and 3 deletions
--- a/defaults/heuristicopensearch.conf
+++ b/defaults/heuristicopensearch.conf
@ -12,7 +12,7 @@
 ## - all lines beginning with '#' and where the second character is not '#' are commented-out keyword lines
 ##

-#Blekko = http://blekko.com/ws/{searchTerms}+/rss  # get 20 results from blekko
+#Blekko = http://blekko.com/ws/{searchTerms} /rss  # get 20 results from blekko
 #Faroo-News = http://www.faroo.com/api?q={searchTerms}&start={startIndex}&length=20&l=en&src=news&f=rss  # get results from Faroo news-search
 #WordPress.com = http://en.search.wordpress.com/?q={searchTerms}&f=feed&page={startPage?}  #Search WordPress.com Blogs
 #Sueddeutsche.de = http://suche.sueddeutsche.de/query/{searchTerms}?output=rss # Sueddeutsche Zeitung Artikel Archiv
--- a/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
+++ b/source/net/yacy/cora/federate/opensearch/OpenSearchConnector.java
@ -23,7 +23,9 @@ import java.io.IOException;
 import java.net.MalformedURLException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.LinkedHashMap;
 import java.util.List;
+import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.encoding.UTF8;
 import net.yacy.cora.document.feed.RSSFeed;
 import net.yacy.cora.document.feed.RSSMessage;
@ -39,6 +41,7 @@ import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.meta.URIMetadataNode;
 import net.yacy.search.query.QueryParams;
 import net.yacy.search.schema.CollectionSchema;
+import org.apache.http.entity.mime.content.ContentBody;

 /**
 * Handling of queries to remote OpenSearch systems. Iterates to a list of
@ -83,10 +86,10 @@ public class OpenSearchConnector extends AbstractFederateSearchConnector impleme
        // see http://www.loc.gov/standards/sru/
        String searchurl = this.parseSearchTemplate(baseurl, query.getQueryGoal().getQueryString(false), 0, query.itemsPerPage);
        try {
-            MultiProtocolURL aurl = new MultiProtocolURL(MultiProtocolURL.unescape(searchurl));
+            MultiProtocolURL aurl = new MultiProtocolURL(searchurl);
            try {
                this.lastaccesstime = System.currentTimeMillis();
-                final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyIntranetCrawlerAgent);
+                final HTTPClient httpClient = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
                byte[] result = httpClient.GETbytes(aurl, null, null, false);
                RSSReader rssReader =  RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
                if (rssReader != null) {