Add a max acceptable limit to the size of Solr responses on p2p search

Following activation of gzip compression on responses, to ensure
uncompressed content can fit on available memory.
This commit is contained in:
luccioman 2018-06-08 10:33:23 +02:00
parent de4ea95687
commit bd4cfeda3f
4 changed files with 230 additions and 17 deletions

View File

@ -66,6 +66,7 @@ import org.apache.solr.update.UpdateShardHandler.IdleConnectionsEvictor;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.http.StrictSizeLimitResponseInterceptor;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory;
@ -143,7 +144,7 @@ public class RemoteInstance implements SolrInstance {
}
return instances;
}
/**
* @param url
* the remote Solr URL. A default localhost URL is assumed when null.
@ -161,6 +162,30 @@ public class RemoteInstance implements SolrInstance {
*/
public RemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName,
final int timeout, final boolean trustSelfSignedOnAuthenticatedServer) throws IOException {
this(url, coreNames, defaultCoreName, timeout, trustSelfSignedOnAuthenticatedServer, Long.MAX_VALUE);
}
/**
* @param url
* the remote Solr URL. A default localhost URL is assumed when null.
* @param coreNames
* the Solr core names for the main collection and the webgraph
* @param defaultCoreName
* the core name of the main collection
* @param timeout
* the connection timeout in milliseconds
* @param trustSelfSignedOnAuthenticatedServer
* when true, self-signed certificates are accepcted for an https
* connection to a remote server with authentication credentials
* @param maxBytesPerReponse
* maximum acceptable decompressed size in bytes for a response from
* the remote Solr server. Negative value or Long.MAX_VALUE means no
* limit.
* @throws IOException
* when a connection could not be opened to the remote Solr instance
*/
public RemoteInstance(final String url, final Collection<String> coreNames, final String defaultCoreName,
final int timeout, final boolean trustSelfSignedOnAuthenticatedServer, final long maxBytesPerResponse) throws IOException {
this.timeout = timeout;
this.server= new HashMap<String, ConcurrentUpdateSolrClient>();
this.solrurl = url == null ? "http://127.0.0.1:8983/solr/" : url; // that should work for the example configuration of solr 4.x.x
@ -212,10 +237,10 @@ public class RemoteInstance implements SolrInstance {
}
}
if (solraccount.length() > 0) {
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer);
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse);
} else if(u.isHTTPS()){
/* Here we must trust self-signed certificates as most peers with SSL enabled use such certificates */
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true);
this.client = buildCustomHttpClient(timeout, u, solraccount, solrpw, host, true, maxBytesPerResponse);
} else {
/* Build a http client using the Solr utils as in the HttpSolrClient constructor implementation.
* The main difference is that a shared connection manager is used (configured in the buildConnectionManager() function) */
@ -224,9 +249,20 @@ public class RemoteInstance implements SolrInstance {
/* Accept gzip compression of responses to reduce network usage */
params.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, true);
this.client = HttpClientUtil.createClient(params, CONNECTION_MANAGER);
if(this.client instanceof DefaultHttpClient && this.client.getParams() != null) {
/* Set the maximum time to get a connection from the shared connections pool */
HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout);
if(this.client instanceof DefaultHttpClient) {
if(this.client.getParams() != null) {
/* Set the maximum time to get a connection from the shared connections pool */
HttpClientParams.setConnectionManagerTimeout(this.client.getParams(), timeout);
}
if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) {
/*
* Add in last position the eventual interceptor limiting the response size, so
* that this is the decompressed amount of bytes that is considered
*/
((DefaultHttpClient)this.client).addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse),
((DefaultHttpClient)this.client).getResponseInterceptorCount());
}
}
}
@ -298,10 +334,14 @@ public class RemoteInstance implements SolrInstance {
* @param solraccount eventual user name used to authenticate on the target Solr
* @param solraccount eventual password used to authenticate on the target Solr
* @param trustSelfSignedCertificates when true, https connections to an host providing a self-signed certificate are accepted
* @param maxBytesPerReponse
* maximum acceptable decompressed size in bytes for a response from
* the remote Solr server. Negative value or Long.MAX_VALUE means no
* limit.
* @return a new apache HttpClient instance usable as a custom http client by SolrJ
*/
private static HttpClient buildCustomHttpClient(final int timeout, final MultiProtocolURL u, final String solraccount, final String solrpw,
final String host, final boolean trustSelfSignedCertificates) {
final String host, final boolean trustSelfSignedCertificates, final long maxBytesPerResponse) {
/* Important note : use of deprecated Apache classes is required because SolrJ still use them internally (see HttpClientUtil).
* Upgrade only when Solr implementation will become compatible */
@ -362,6 +402,15 @@ public class RemoteInstance implements SolrInstance {
result.setCredentialsProvider(credsProvider);
}
if (maxBytesPerResponse >= 0 && maxBytesPerResponse < Long.MAX_VALUE) {
/*
* Add in last position the eventual interceptor limiting the response size, so
* that this is the decompressed amount of bytes that is considered
*/
result.addResponseInterceptor(new StrictSizeLimitResponseInterceptor(maxBytesPerResponse),
result.getResponseInterceptorCount());
}
return result;
}

View File

@ -0,0 +1,92 @@
// StrictSizeLimitEntityWrapper.java
// ---------------------------
// Copyright 2018 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.protocol.http;
import java.io.IOException;
import java.io.InputStream;
import org.apache.http.HttpEntity;
import org.apache.http.entity.HttpEntityWrapper;
import net.yacy.cora.util.StrictLimitInputStream;
/**
* HTTP entity wrapper used to strictly limit the size of the response content
* fetched from an http connection.
*
*/
public class StrictSizeLimitEntityWrapper extends HttpEntityWrapper {
/** Reusable wrapped content stream */
private InputStream content;
/** Maximum amount of bytes to fetch from the http response body */
private final long maxBytes;
/**
* @param wrappedEntity
* the http entity to wrap. Must not be null.
* @param maxBytes
* the maximum amount of bytes to fetch from the http response body
* @throws IllegalArgumentException
* when wrappedEntity parameter is null or when maxBytes value is
* lower than zero.
*/
public StrictSizeLimitEntityWrapper(final HttpEntity wrappedEntity, final long maxBytes) {
super(wrappedEntity);
if (wrappedEntity == null) {
throw new IllegalArgumentException("The wrappedEntity parameter must not be null.");
}
if (maxBytes < 0) {
throw new IllegalArgumentException("The maxBytes parameter must be greater or equal than zero.");
}
this.maxBytes = maxBytes;
}
/**
* @return a wrapper on the wrapped entity content stream
* @throws IOException
* when an error occurred while accessing the wrapped stream
*/
private InputStream getWrappedStream() throws IOException {
final InputStream in = this.wrappedEntity.getContent();
if (in == null) {
return in;
}
return new StrictLimitInputStream(in, this.maxBytes);
}
@Override
public InputStream getContent() throws IOException {
final InputStream result;
if (this.content == null) {
this.content = this.getWrappedStream();
result = this.content;
} else {
result = this.content;
}
return result;
}
}

View File

@ -0,0 +1,64 @@
// StrictSizeLimitResponseInterceptor.java
// ---------------------------
// Copyright 2018 by luccioman; https://github.com/luccioman
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.protocol.http;
import java.io.IOException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.protocol.HttpContext;
/**
* An HTTP response interceptor stricly limiting the amount of bytes fetched
* from an HTTP response.
*/
public class StrictSizeLimitResponseInterceptor implements HttpResponseInterceptor {
/** Maximum amount of bytes to fetch from the HTTP response body */
private final long maxBytes;
/**
* @param maxBytes
* the maximum amount of bytes to fetch from the HTTP response body
* @throws IllegalArgumentException
* when the maxBytes value is lower than zero
*/
public StrictSizeLimitResponseInterceptor(final long maxBytes) {
if (maxBytes < 0) {
throw new IllegalArgumentException("The maxBytes parameter must be greater or equals than zero");
}
this.maxBytes = maxBytes;
}
@Override
public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException {
final HttpEntity entity = response.getEntity();
if (entity != null) {
response.setEntity(new StrictSizeLimitEntityWrapper(entity, this.maxBytes));
}
}
}

View File

@ -110,6 +110,7 @@ import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceContainerCache;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
@ -1086,15 +1087,17 @@ public final class Protocol {
* @param messageBegin beginning of the log message
* @param ex exception to log
*/
private void logError(String messageBegin, Exception ex) {
String message = ex.getMessage();
if(message == null) {
message = "no details";
} else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){
/* Strip too large details to avoid polluting this log with complete remote stack traces */
message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "...";
private void logError(final String messageBegin, final Exception ex) {
if(log.isFine()) {
String message = ex.getMessage();
if(message == null) {
message = "no details";
} else if(message.length() > MAX_ERROR_MESSAGE_LENGTH){
/* Strip too large details to avoid polluting this log with complete remote stack traces */
message = message.substring(0, MAX_ERROR_MESSAGE_LENGTH) + "...";
}
log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message);
}
log.fine(messageBegin + " at " + this.targetBaseURL + " : " + message);
}
@Override
@ -1106,8 +1109,13 @@ public final class Protocol {
SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED,
SwitchboardConstants.FEDERATED_SERVICE_SOLR_INDEXING_AUTHENTICATED_ALLOW_SELF_SIGNED_DEFAULT);
}
this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer); // this is a 'patch configuration' which considers 'solr' as default collection
/* Add a limit to the maximum acceptable size of the remote peer Solr response. This can help prevent out of memory errors when :
* - this peer is overloaded
* - the remote peer has indexed documents with excessively large metadata (too large at least to fit within this peer resources)
* - the remote peer is a malicious one and would like to trigger a deny of service */
final long maxBytesPerResponse = MemoryControl.available() / 4;
this.instance = new RemoteInstance(this.targetBaseURL, null, "solr", this.timeout, trustSelfSignedOnAuthenticatedServer, maxBytesPerResponse); // this is a 'patch configuration' which considers 'solr' as default collection
try {
boolean useBinaryResponseWriter = SwitchboardConstants.REMOTE_SOLR_BINARY_RESPONSE_ENABLED_DEFAULT;
if (Switchboard.getSwitchboard() != null) {