allow url parameter in worktable apicall

allow url=wwwl?param=a&param=b (with ?, & encoded)
fix:  http://mantis.tokeek.de/view.php?id=100

fix double adding of  '&' in MultiProtocolURL.escape()
This commit is contained in:
reger 2014-10-04 04:11:48 +02:00
parent b5ca20de15
commit 209e0f2fe8
3 changed files with 24 additions and 15 deletions

View File

@ -36,6 +36,7 @@ import net.yacy.cora.document.feed.RSSFeed;
import net.yacy.cora.document.feed.RSSMessage;
import net.yacy.cora.document.feed.RSSReader;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
@ -212,7 +213,7 @@ public class Load_RSS_p {
final Date date_next_exec = r.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
prop.put("showscheduledfeeds_list_" + apic + "_pk", UTF8.String(row.getPK()));
prop.put("showscheduledfeeds_list_" + apic + "_count", apic);
prop.putXML("showscheduledfeeds_list_" + apic + "_rss", messageurl);
prop.put("showscheduledfeeds_list_" + apic + "_rss", MultiProtocolURL.escape(messageurl).toString());
prop.putXML("showscheduledfeeds_list_" + apic + "_title", row.get("title", ""));
prop.putXML("showscheduledfeeds_list_" + apic + "_referrer", referrer == null ? "#" : referrer.toNormalform(true));
prop.put("showscheduledfeeds_list_" + apic + "_recording", DateFormat.getDateTimeInstance().format(row.get("recording_date", new Date())));

View File

@ -66,8 +66,6 @@ import net.yacy.crawler.retrieval.Response;
*/
public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolURL> {
public static final MultiProtocolURL POISON = new MultiProtocolURL(); // poison pill for concurrent link generators
private static final long serialVersionUID = -1173233022912141884L;
private static final long SMB_TIMEOUT = 5000;
@ -373,6 +371,11 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
escape();
}
/**
* creates MultiProtocolURL
* if path contains '?' search part is automatically created by splitting input into path and searchpart
* dto for anchor's ('#')
*/
public MultiProtocolURL(final String protocol, String host, final int port, final String path) throws MalformedURLException {
if (protocol == null) throw new MalformedURLException("protocol is null");
if (host.indexOf(':') >= 0 && host.charAt(0) != '[') host = '[' + host + ']'; // IPv6 host must be enclosed in square brackets
@ -521,9 +524,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
sbuf.append((char)ch); // leave it that way, it is used the right way
} else {
sbuf.append("&amp;"); // this must be urlencoded
sbuf.append("%26"); // this must be urlencoded
}
sbuf.append((char)ch);
} else if (ch == '#') { // RFC 1738 2.2 unsafe char is _not_ encoded because it may already be used for encoding
sbuf.append((char)ch);
} else if (ch == '!' || ch == ':' // unreserved

View File

@ -28,6 +28,7 @@ package net.yacy.data;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@ -42,6 +43,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
@ -222,7 +224,6 @@ public class WorkTables extends Tables {
final HTTPClient client = new HTTPClient(ClientIdentification.yacyInternetCrawlerAgent);
client.setTimout(120000);
Tables.Row row;
String url;
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (final String pk: pks) {
row = null;
@ -234,15 +235,20 @@ public class WorkTables extends Tables {
ConcurrentLog.logException(e);
}
if (row == null) continue;
url = "http://" + host + ":" + port + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL));
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
ConcurrentLog.info("WorkTables", "executing url: " + url);
String theapicall = UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)) + "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + UTF8.String(row.getPK());
try {
client.GETbytes(url, username, pass, false);
l.put(url, client.getStatusCode());
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
MultiProtocolURL url = new MultiProtocolURL("http", host, port, theapicall);
ConcurrentLog.info("WorkTables", "executing url: " + url.toString());
try {
client.GETbytes(url, username, pass, false); // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
l.put(url.toString(), client.getStatusCode());
} catch (final IOException e) {
ConcurrentLog.logException(e);
l.put(url, -1);
l.put(url.toString(), -1);
}
} catch (MalformedURLException ex) {
ConcurrentLog.warn("APICALL", "wrong url in apicall " + theapicall);
}
}
return l;