From bd4cfeda3fd9addd24b5f81169189ce272130835 Mon Sep 17 00:00:00 2001 From: luccioman Date: Fri, 8 Jun 2018 10:33:23 +0200 Subject: [PATCH] Add a max acceptable limit to the size of Solr responses on p2p search Following activation of gzip compression on responses, to ensure uncompressed content can fit on available memory. --- .../solr/instance/RemoteInstance.java | 63 +++++++++++-- .../http/StrictSizeLimitEntityWrapper.java | 92 +++++++++++++++++++ .../StrictSizeLimitResponseInterceptor.java | 64 +++++++++++++ source/net/yacy/peers/Protocol.java | 28 ++++-- 4 files changed, 230 insertions(+), 17 deletions(-) create mode 100644 source/net/yacy/cora/protocol/http/StrictSizeLimitEntityWrapper.java create mode 100644 source/net/yacy/cora/protocol/http/StrictSizeLimitResponseInterceptor.java diff --git a/source/net/yacy/cora/federate/solr/instance/RemoteInstance.java b/source/net/yacy/cora/federate/solr/instance/RemoteInstance.java index 04de504e1..3f13866d4 100644 --- a/source/net/yacy/cora/federate/solr/instance/RemoteInstance.java +++ b/source/net/yacy/cora/federate/solr/instance/RemoteInstance.java @@ -66,6 +66,7 @@ import org.apache.solr.update.UpdateShardHandler.IdleConnectionsEvictor; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.cora.protocol.http.StrictSizeLimitResponseInterceptor; import net.yacy.cora.util.CommonPattern; import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.Memory; @@ -143,7 +144,7 @@ public class RemoteInstance implements SolrInstance { } return instances; } - + /** * @param url * the remote Solr URL. A default localhost URL is assumed when null. @@ -161,6 +162,30 @@ public class RemoteInstance implements SolrInstance { */ public RemoteInstance(final String url, final Collection coreNames, final String defaultCoreName, final int timeout, final boolean trustSelfSignedOnAuthenticatedServer) throws IOException { + this(url, coreNames, defaultCoreName, timeout, trustSelfSignedOnAuthenticatedServer, Long.MAX_VALUE); + } + + /** + * @param url + * the remote Solr URL. A default localhost URL is assumed when null. + * @param coreNames + * the Solr core names for the main collection and the webgraph + * @param defaultCoreName + * the core name of the main collection + * @param timeout + * the connection timeout in milliseconds + * @param trustSelfSignedOnAuthenticatedServer + * when true, self-signed certificates are accepcted for an https + * connection to a remote server with authentication credentials + * @param maxBytesPerReponse + * maximum acceptable decompressed size in bytes for a response from + * the remote Solr server. Negative value or Long.MAX_VALUE means no + * limit. + * @throws IOException + * when a connection could not be opened to the remote Solr instance + */ + public RemoteInstance(final String url, final Collection coreNames, final String defaultCoreName, + final int timeout, final boolean trustSelfSignedOnAuthenticatedServer, final long maxBytesPerResponse) throws IOException { this.timeout = timeout; this.server= new HashMap(); this.solrurl = url == null ? "http://127.0.0.1:8983/solr/" : url; // that should work for the example configuration of solr 4.x.x @@ -212,10 +237,10 @@ public class RemoteInstance implements SolrInstance { } } if (solraccount.length() > 0) { - this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer); + this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse); } else if(u.isHTTPS()){ /* Here we must trust self-signed certificates as most peers with SSL enabled use such certificates */ - this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true); + this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true, maxBytesPerResponse); } else { /* Build a http client using the Solr utils as in the HttpSolrClient constructor implementation. * The main difference is that a shared connection manager is used (configured in the buildConnectionManager() function) */ @@ -224,9 +249,20 @@ public class RemoteInstance implements SolrInstance { /* Accept gzip compression of responses to reduce network usage */ params.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, true); this.client = HttpClientUtil.createClient(params, CONNECTION_MANAGER); - if(this.client instanceof DefaultHttpClient && this.client.getParams() != null) { - /* Set the maximum time to get a connection from the shared connections pool */ - HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout); + if(this.client instanceof DefaultHttpClient) { + if(this.client.getParams() != null) { + /* Set the maximum time to get a connection from the shared connections pool */ + HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout); + } + + if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) { + /* + * Add in last position the eventual interceptor limiting the response size, so + * that this is the decompressed amount of bytes that is considered + */ + ((DefaultHttpClient)this.client).addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse), + ((DefaultHttpClient)this.client).getResponseInterceptorCount()); + } } } @@ -298,10 +334,14 @@ public class RemoteInstance implements SolrInstance { * @param solraccount eventual user name used to authenticate on the target Solr * @param solraccount eventual password used to authenticate on the target Solr * @param trustSelfSignedCertificates when true, https connections to an host providing a self-signed certificate are accepted + * @param maxBytesPerReponse + * maximum acceptable decompressed size in bytes for a response from + * the remote Solr server. Negative value or Long.MAX_VALUE means no + * limit. * @return a new apache HttpClient instance usable as a custom http client by SolrJ */ private static HttpClient buildCustomHttpClient(final int timeout, final MultiProtocolURL u, final String solraccount, final String solrpw, - final String host, final boolean trustSelfSignedCertificates) { + final String host, final boolean trustSelfSignedCertificates, final long maxBytesPerResponse) { /* Important note : use of deprecated Apache classes is required because SolrJ still use them internally (see HttpClientUtil). * Upgrade only when Solr implementation will become compatible */ @@ -362,6 +402,15 @@ public class RemoteInstance implements SolrInstance { result.setCredentialsProvider(credsProvider); } + if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) { + /* + * Add in last position the eventual interceptor limiting the response size, so + * that this is the decompressed amount of bytes that is considered + */ + result.addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse), + result.getResponseInterceptorCount()); + } + return result; } diff --git a/source/net/yacy/cora/protocol/http/StrictSizeLimitEntityWrapper.java b/source/net/yacy/cora/protocol/http/StrictSizeLimitEntityWrapper.java new file mode 100644 index 000000000..b3ea39a12 --- /dev/null +++ b/source/net/yacy/cora/protocol/http/StrictSizeLimitEntityWrapper.java @@ -0,0 +1,92 @@ +// StrictSizeLimitEntityWrapper.java +// --------------------------- +// Copyright 2018 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.protocol.http; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.http.HttpEntity; +import org.apache.http.entity.HttpEntityWrapper; + +import net.yacy.cora.util.StrictLimitInputStream; + +/** + * HTTP entity wrapper used to strictly limit the size of the response content + * fetched from an http connection. + * + */ +public class StrictSizeLimitEntityWrapper extends HttpEntityWrapper { + + /** Reusable wrapped content stream */ + private InputStream content; + + /** Maximum amount of bytes to fetch from the http response body */ + private final long maxBytes; + + /** + * @param wrappedEntity + * the http entity to wrap. Must not be null. + * @param maxBytes + * the maximum amount of bytes to fetch from the http response body + * @throws IllegalArgumentException + * when wrappedEntity parameter is null or when maxBytes value is + * lower than zero. + */ + public StrictSizeLimitEntityWrapper(final HttpEntity wrappedEntity, final long maxBytes) { + super(wrappedEntity); + if (wrappedEntity == null) { + throw new IllegalArgumentException("The wrappedEntity parameter must not be null."); + } + if (maxBytes < 0) { + throw new IllegalArgumentException("The maxBytes parameter must be greater or equal than zero."); + } + this.maxBytes = maxBytes; + } + + /** + * @return a wrapper on the wrapped entity content stream + * @throws IOException + * when an error occurred while accessing the wrapped stream + */ + private InputStream getWrappedStream() throws IOException { + final InputStream in = this.wrappedEntity.getContent(); + if (in == null) { + return in; + } + + return new StrictLimitInputStream(in, this.maxBytes); + } + + @Override + public InputStream getContent() throws IOException { + final InputStream result; + if (this.content == null) { + this.content = this.getWrappedStream(); + result = this.content; + } else { + result = this.content; + } + return result; + } + +} \ No newline at end of file diff --git a/source/net/yacy/cora/protocol/http/StrictSizeLimitResponseInterceptor.java b/source/net/yacy/cora/protocol/http/StrictSizeLimitResponseInterceptor.java new file mode 100644 index 000000000..bda0087c9 --- /dev/null +++ b/source/net/yacy/cora/protocol/http/StrictSizeLimitResponseInterceptor.java @@ -0,0 +1,64 @@ +// StrictSizeLimitResponseInterceptor.java +// --------------------------- +// Copyright 2018 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.protocol.http; + +import java.io.IOException; + +import org.apache.http.HttpEntity; +import org.apache.http.HttpException; +import org.apache.http.HttpResponse; +import org.apache.http.HttpResponseInterceptor; +import org.apache.http.protocol.HttpContext; + +/** + * An HTTP response interceptor stricly limiting the amount of bytes fetched + * from an HTTP response. + */ +public class StrictSizeLimitResponseInterceptor implements HttpResponseInterceptor { + + /** Maximum amount of bytes to fetch from the HTTP response body */ + private final long maxBytes; + + /** + * @param maxBytes + * the maximum amount of bytes to fetch from the HTTP response body + * @throws IllegalArgumentException + * when the maxBytes value is lower than zero + */ + public StrictSizeLimitResponseInterceptor(final long maxBytes) { + if (maxBytes < 0) { + throw new IllegalArgumentException("The maxBytes parameter must be greater or equals than zero"); + } + this.maxBytes = maxBytes; + } + + @Override + public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { + final HttpEntity entity = response.getEntity(); + if (entity != null) { + response.setEntity(new StrictSizeLimitEntityWrapper(entity, this.maxBytes)); + } + + } + +} diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 3c55e15d8..56270dc1c 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -110,6 +110,7 @@ import net.yacy.kelondro.rwi.Reference; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainerCache; import net.yacy.kelondro.util.FileUtils; +import net.yacy.kelondro.util.MemoryControl; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.peers.graphics.WebStructureGraph.HostReference; @@ -1086,15 +1087,17 @@ public final class Protocol { * @param messageBegin beginning of the log message * @param ex exception to log */ - private void logError(String messageBegin, Exception ex) { - String message = ex.getMessage(); - if(message == null) { - message = "no details"; - } else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){ - /* Strip too large details to avoid polluting this log with complete remote stack traces */ - message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "..."; + private void logError(final String messageBegin, final Exception ex) { + if(log.isFine()) { + String message = ex.getMessage(); + if(message == null) { + message = "no details"; + } else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){ + /* Strip too large details to avoid polluting this log with complete remote stack traces */ + message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "..."; + } + log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message); } - log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message); } @Override @@ -1106,8 +1109,13 @@ public final class Protocol { SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED, SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED_DEFAULT); } - - this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer); // this is a 'patch configuration' which considers 'solr' as default collection + /* Add a limit to the maximum acceptable size of the remote peer Solr response. This can help prevent out of memory errors when : + * - this peer is overloaded + * - the remote peer has indexed documents with excessively large metadata (too large at least to fit within this peer resources) + * - the remote peer is a malicious one and would like to trigger a deny of service */ + final long maxBytesPerResponse = MemoryControl.available() / 4; + + this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse); // this is a 'patch configuration' which considers 'solr' as default collection try { boolean useBinaryResponseWriter = SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT; if (Switchboard.getSwitchboard() != null) {