mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
allow url parameter in worktable apicall
allow url=wwwl?param=a¶m=b (with ?, & encoded) fix: http://mantis.tokeek.de/view.php?id=100 fix double adding of '&' in MultiProtocolURL.escape()
This commit is contained in:
parent
b5ca20de15
commit
209e0f2fe8
|
@ -36,6 +36,7 @@ import net.yacy.cora.document.feed.RSSFeed;
|
|||
import net.yacy.cora.document.feed.RSSMessage;
|
||||
import net.yacy.cora.document.feed.RSSReader;
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
|
@ -212,7 +213,7 @@ public class Load_RSS_p {
|
|||
final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK()));
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_rss", MultiProtocolURL.escape(messageurl).toString());
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
|
||||
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
|
||||
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));
|
||||
|
|
|
@ -66,8 +66,6 @@ import net.yacy.crawler.retrieval.Response;
|
|||
*/
|
||||
public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolURL> {
|
||||
|
||||
public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators
|
||||
|
||||
private static final long serialVersionUID = -1173233022912141884L;
|
||||
private static final long SMB_TIMEOUT = 5000;
|
||||
|
||||
|
@ -373,6 +371,11 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
|
|||
escape();
|
||||
}
|
||||
|
||||
/**
|
||||
* creates MultiProtocolURL
|
||||
* if path contains '?' search part is automatically created by splitting input into path and searchpart
|
||||
* dto for anchor's ('#')
|
||||
*/
|
||||
public MultiProtocolURL(final String protocol, String host, final int port, final String path) throws MalformedURLException {
|
||||
if (protocol == null) throw new MalformedURLException("protocol is null");
|
||||
if (host.indexOf(':') >= 0 && host.charAt(0) != '[') host = '[' + host + ']'; // IPv6 host must be enclosed in square brackets
|
||||
|
@ -521,9 +524,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
|
|||
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
|
||||
sbuf.append((char)ch); // leave it that way, it is used the right way
|
||||
} else {
|
||||
sbuf.append("&"); // this must be urlencoded
|
||||
sbuf.append("%26"); // this must be urlencoded
|
||||
}
|
||||
sbuf.append((char)ch);
|
||||
} else if (ch == '#') { // RFC 1738 2.2 unsafe char is _not_ encoded because it may already be used for encoding
|
||||
sbuf.append((char)ch);
|
||||
} else if (ch == '!' || ch == ':' // unreserved
|
||||
|
|
|
@ -28,6 +28,7 @@ package net.yacy.data;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
|
@ -42,6 +43,7 @@ import net.yacy.cora.date.GenericFormatter;
|
|||
import net.yacy.cora.document.encoding.ASCII;
|
||||
import net.yacy.cora.document.encoding.UTF8;
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||
import net.yacy.cora.order.Base64Order;
|
||||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
|
@ -222,7 +224,6 @@ public class WorkTables extends Tables {
|
|||
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
|
||||
client.setTimout(120000);
|
||||
Tables.Row row;
|
||||
String url;
|
||||
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
|
||||
for (final String pk: pks) {
|
||||
row = null;
|
||||
|
@ -234,15 +235,20 @@ public class WorkTables extends Tables {
|
|||
ConcurrentLog.logException(e);
|
||||
}
|
||||
if (row == null) continue;
|
||||
url = "http://" + host + ":" + port + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL));
|
||||
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
|
||||
ConcurrentLog.info("WorkTables", "executing url: " + url);
|
||||
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
|
||||
try {
|
||||
client.GETbytes(url, username, pass, false);
|
||||
l.put(url, client.getStatusCode());
|
||||
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
|
||||
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
|
||||
ConcurrentLog.info("WorkTables", "executing url: " + url.toString());
|
||||
try {
|
||||
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
|
||||
l.put(url.toString(), client.getStatusCode());
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
l.put(url, -1);
|
||||
l.put(url.toString(), -1);
|
||||
}
|
||||
} catch (MalformedURLException ex) {
|
||||
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
|
||||
}
|
||||
}
|
||||
return l;
|
||||
|
|
Loading…
Reference in New Issue
Block a user