mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Process large or local file images dealing directly with content
InputStream.
This commit is contained in:
parent
3c4c77099d
commit
f01d49c37a
|
@ -28,10 +28,13 @@ import java.awt.Image;
|
||||||
import java.awt.MediaTracker;
|
import java.awt.MediaTracker;
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
import java.awt.image.Raster;
|
import java.awt.image.Raster;
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import javax.imageio.stream.ImageInputStream;
|
||||||
|
|
||||||
import net.yacy.cora.document.id.DigestURL;
|
import net.yacy.cora.document.id.DigestURL;
|
||||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||||
import net.yacy.cora.federate.yacy.CacheStrategy;
|
import net.yacy.cora.federate.yacy.CacheStrategy;
|
||||||
|
@ -42,11 +45,11 @@ import net.yacy.cora.protocol.RequestHeader;
|
||||||
import net.yacy.cora.storage.ConcurrentARC;
|
import net.yacy.cora.storage.ConcurrentARC;
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
import net.yacy.data.URLLicense;
|
import net.yacy.data.URLLicense;
|
||||||
import net.yacy.document.ImageParser;
|
|
||||||
import net.yacy.kelondro.util.MemoryControl;
|
import net.yacy.kelondro.util.MemoryControl;
|
||||||
import net.yacy.kelondro.workflow.WorkflowProcessor;
|
import net.yacy.kelondro.workflow.WorkflowProcessor;
|
||||||
import net.yacy.peers.graphics.EncodedImage;
|
import net.yacy.peers.graphics.EncodedImage;
|
||||||
import net.yacy.repository.Blacklist.BlacklistType;
|
import net.yacy.repository.Blacklist.BlacklistType;
|
||||||
|
import net.yacy.repository.LoaderDispatcher;
|
||||||
import net.yacy.search.Switchboard;
|
import net.yacy.search.Switchboard;
|
||||||
import net.yacy.server.serverObjects;
|
import net.yacy.server.serverObjects;
|
||||||
import net.yacy.server.serverSwitch;
|
import net.yacy.server.serverSwitch;
|
||||||
|
@ -74,8 +77,8 @@ public class ViewImage {
|
||||||
* when specified url is malformed, or a read/write error
|
* when specified url is malformed, or a read/write error
|
||||||
* occured, or input or target image format is not supported.
|
* occured, or input or target image format is not supported.
|
||||||
* Sould end in a HTTP 500 error whose processing is more
|
* Sould end in a HTTP 500 error whose processing is more
|
||||||
* consistent across browsers than a response with zero
|
* consistent across browsers than a response with zero content
|
||||||
* content bytes.
|
* bytes.
|
||||||
*/
|
*/
|
||||||
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env)
|
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -113,39 +116,81 @@ public class ViewImage {
|
||||||
if (image != null) {
|
if (image != null) {
|
||||||
encodedImage = new EncodedImage(image, ext, post.getBoolean("isStatic"));
|
encodedImage = new EncodedImage(image, ext, post.getBoolean("isStatic"));
|
||||||
} else {
|
} else {
|
||||||
byte[] resourceb = null;
|
|
||||||
if (url != null)
|
|
||||||
try {
|
|
||||||
String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName
|
|
||||||
: ClientIdentification.yacyInternetCrawlerAgentName);
|
|
||||||
ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
|
|
||||||
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST,
|
|
||||||
BlacklistType.SEARCH, agent);
|
|
||||||
} catch (final IOException e) {
|
|
||||||
ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage());
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
boolean okToCache = true;
|
|
||||||
if (resourceb == null) {
|
|
||||||
/*
|
|
||||||
* Throw an exception, wich will end in a HTTP 500 response,
|
|
||||||
* better handled by browsers than an empty image
|
|
||||||
*/
|
|
||||||
throw new IOException("Image could not be loaded.");
|
|
||||||
}
|
|
||||||
|
|
||||||
String urlExt = MultiProtocolURL.getFileExtension(url.getFileName());
|
String urlExt = MultiProtocolURL.getFileExtension(url.getFileName());
|
||||||
if (ext != null && ext.equalsIgnoreCase(urlExt) && isBrowserRendered(urlExt)) {
|
if (ext != null && ext.equalsIgnoreCase(urlExt) && isBrowserRendered(urlExt)) {
|
||||||
return new ByteArrayInputStream(resourceb);
|
return openInputStream(post, sb.loader, auth, url);
|
||||||
}
|
}
|
||||||
|
|
||||||
// read image
|
ImageInputStream imageInStream = null;
|
||||||
encodedImage = parseAndScale(post, auth, urlString, ext, okToCache, resourceb);
|
InputStream inStream = null;
|
||||||
|
/*
|
||||||
|
* When opening a file, the most efficient is to open
|
||||||
|
* ImageInputStream directly on file
|
||||||
|
*/
|
||||||
|
if (url.isFile()) {
|
||||||
|
imageInStream = ImageIO.createImageInputStream(url.getFSFile());
|
||||||
|
} else {
|
||||||
|
inStream = openInputStream(post, sb.loader, auth, url);
|
||||||
|
imageInStream = ImageIO.createImageInputStream(inStream);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
// read image
|
||||||
|
encodedImage = parseAndScale(post, auth, urlString, ext, imageInStream);
|
||||||
|
} finally {
|
||||||
|
/*
|
||||||
|
* imageInStream.close() method doesn't close source input
|
||||||
|
* stream
|
||||||
|
*/
|
||||||
|
if (inStream != null) {
|
||||||
|
try {
|
||||||
|
inStream.close();
|
||||||
|
} catch (IOException ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return encodedImage;
|
return encodedImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open input stream on image url using provided loader. All parameters must
|
||||||
|
* not be null.
|
||||||
|
*
|
||||||
|
* @param post
|
||||||
|
* post parameters.
|
||||||
|
* @param loader.
|
||||||
|
* Resources loader.
|
||||||
|
* @param auth
|
||||||
|
* true when user has credentials to load full images.
|
||||||
|
* @param url
|
||||||
|
* image url.
|
||||||
|
* @return an open input stream instance (don't forget to close it).
|
||||||
|
* @throws IOException
|
||||||
|
* when a read/write error occured.
|
||||||
|
*/
|
||||||
|
private static InputStream openInputStream(final serverObjects post, final LoaderDispatcher loader,
|
||||||
|
final boolean auth, DigestURL url) throws IOException {
|
||||||
|
InputStream inStream = null;
|
||||||
|
if (url != null) {
|
||||||
|
try {
|
||||||
|
String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName
|
||||||
|
: ClientIdentification.yacyInternetCrawlerAgentName);
|
||||||
|
ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName);
|
||||||
|
inStream = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST,
|
||||||
|
BlacklistType.SEARCH, agent);
|
||||||
|
} catch (final IOException e) {
|
||||||
|
ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage());
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inStream == null) {
|
||||||
|
throw new IOException("Input stream could no be open");
|
||||||
|
}
|
||||||
|
return inStream;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param formatName
|
* @param formatName
|
||||||
* informal file format name. For example : "png".
|
* informal file format name. For example : "png".
|
||||||
|
@ -165,31 +210,35 @@ public class ViewImage {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process resourceb byte array to try to produce an EncodedImage instance
|
* Process source image to try to produce an EncodedImage instance
|
||||||
* eventually scaled and cropped depending on post parameters.
|
* eventually scaled and clipped depending on post parameters. When
|
||||||
|
* processed, imageInStream is closed.
|
||||||
*
|
*
|
||||||
* @param post
|
* @param post
|
||||||
* request post parameters. Must not be null.
|
* request post parameters. Must not be null.
|
||||||
* @param auth
|
* @param auth
|
||||||
* true when access rigths are OK.
|
* true when access rigths are OK.
|
||||||
* @param urlString
|
* @param urlString
|
||||||
* image source URL. Must not be null.
|
* image source URL as String. Must not be null.
|
||||||
* @param ext
|
* @param ext
|
||||||
* image file extension. May be null.
|
* target image file format. May be null.
|
||||||
* @param okToCache
|
* @param imageInStream
|
||||||
* true when image can be cached
|
* open stream on image content. Must not be null.
|
||||||
* @param resourceb
|
|
||||||
* byte array. Must not be null.
|
|
||||||
* @return an EncodedImage instance.
|
* @return an EncodedImage instance.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* when image could not be parsed or encoded to specified format
|
* when image could not be parsed or encoded to specified format
|
||||||
*/
|
*/
|
||||||
protected static EncodedImage parseAndScale(serverObjects post, boolean auth, String urlString, String ext,
|
protected static EncodedImage parseAndScale(serverObjects post, boolean auth, String urlString, String ext,
|
||||||
boolean okToCache, byte[] resourceb) throws IOException {
|
ImageInputStream imageInStream) throws IOException {
|
||||||
EncodedImage encodedImage = null;
|
EncodedImage encodedImage = null;
|
||||||
|
|
||||||
Image image = ImageParser.parse(urlString, resourceb);
|
Image image = ImageIO.read(imageInStream);
|
||||||
if (image == null) {
|
if (image == null) {
|
||||||
|
try {
|
||||||
|
/* When a null image is returned, we have to close the stream */
|
||||||
|
imageInStream.close();
|
||||||
|
} catch (IOException ignoredException) {
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Throw an exception, wich will end in a HTTP 500 response, better
|
* Throw an exception, wich will end in a HTTP 500 response, better
|
||||||
* handled by browsers than an empty image
|
* handled by browsers than an empty image
|
||||||
|
@ -197,53 +246,52 @@ public class ViewImage {
|
||||||
throw new IOException("Image format is not supported.");
|
throw new IOException("Image format is not supported.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (image != null) {
|
int maxwidth = post.getInt("maxwidth", 0);
|
||||||
int maxwidth = post.getInt("maxwidth", 0);
|
int maxheight = post.getInt("maxheight", 0);
|
||||||
int maxheight = post.getInt("maxheight", 0);
|
final boolean quadratic = post.containsKey("quadratic");
|
||||||
final boolean quadratic = post.containsKey("quadratic");
|
boolean isStatic = post.getBoolean("isStatic");
|
||||||
boolean isStatic = post.getBoolean("isStatic");
|
if (!auth || maxwidth != 0 || maxheight != 0) {
|
||||||
if (!auth || maxwidth != 0 || maxheight != 0) {
|
|
||||||
|
|
||||||
// find original size
|
// find original size
|
||||||
int h = image.getHeight(null);
|
final int originWidth = image.getWidth(null);
|
||||||
int w = image.getWidth(null);
|
final int originHeigth = image.getHeight(null);
|
||||||
|
|
||||||
// in case of not-authorized access shrink the image to
|
// in case of not-authorized access shrink the image to
|
||||||
// prevent
|
// prevent
|
||||||
// copyright problems, so that images are not larger than
|
// copyright problems, so that images are not larger than
|
||||||
// thumbnails
|
// thumbnails
|
||||||
Dimension maxDimensions = calculateMaxDimensions(auth, w, h, maxwidth, maxheight);
|
Dimension maxDimensions = calculateMaxDimensions(auth, originWidth, originHeigth, maxwidth, maxheight);
|
||||||
|
|
||||||
// if a quadratic flag is set, we cut the image out to be in
|
// if a quadratic flag is set, we cut the image out to be in
|
||||||
// quadratic shape
|
// quadratic shape
|
||||||
if (quadratic && w != h) {
|
int w = originWidth;
|
||||||
image = makeSquare(image, h, w);
|
int h = originHeigth;
|
||||||
h = image.getHeight(null);
|
if (quadratic && originWidth != originHeigth) {
|
||||||
w = image.getWidth(null);
|
image = makeSquare(image, originHeigth, originWidth);
|
||||||
}
|
h = image.getHeight(null);
|
||||||
|
w = image.getWidth(null);
|
||||||
Dimension finalDimensions = calculateDimensions(w, h, maxDimensions);
|
|
||||||
|
|
||||||
if (w != finalDimensions.width && h != finalDimensions.height) {
|
|
||||||
image = scale(finalDimensions.width, finalDimensions.height, image);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((finalDimensions.width == 16) && (finalDimensions.height == 16) && okToCache) {
|
|
||||||
// this might be a favicon, store image to cache for
|
|
||||||
// faster
|
|
||||||
// re-load later on
|
|
||||||
iconcache.put(urlString, image);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
* An error can still occur when transcoding from buffered image to
|
Dimension finalDimensions = calculateDimensions(w, h, maxDimensions);
|
||||||
* target ext : in that case return null
|
|
||||||
*/
|
if (w != finalDimensions.width && h != finalDimensions.height) {
|
||||||
encodedImage = new EncodedImage(image, ext, isStatic);
|
image = scale(finalDimensions.width, finalDimensions.height, image);
|
||||||
if (encodedImage.getImage().length() == 0) {
|
|
||||||
throw new IOException("Image could not be encoded to format : " + ext);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (finalDimensions.width == 16 && finalDimensions.height == 16) {
|
||||||
|
// this might be a favicon, store image to cache for
|
||||||
|
// faster
|
||||||
|
// re-load later on
|
||||||
|
iconcache.put(urlString, image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* An error can still occur when transcoding from buffered image to
|
||||||
|
* target ext : in that case return null
|
||||||
|
*/
|
||||||
|
encodedImage = new EncodedImage(image, ext, isStatic);
|
||||||
|
if (encodedImage.getImage().length() == 0) {
|
||||||
|
throw new IOException("Image could not be encoded to format : " + ext);
|
||||||
}
|
}
|
||||||
return encodedImage;
|
return encodedImage;
|
||||||
}
|
}
|
||||||
|
|
125
source/net/yacy/cora/util/HTTPInputStream.java
Executable file
125
source/net/yacy/cora/util/HTTPInputStream.java
Executable file
|
@ -0,0 +1,125 @@
|
||||||
|
/**
|
||||||
|
* HTTPInputStream
|
||||||
|
* Copyright 2014 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||||
|
* First published 26.11.2014 on http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import net.yacy.cora.protocol.http.HTTPClient;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A HTTP InputStream delegating to HTTPClient. Use it when streaming HTTP content to easily finish HTTP client when closing stream.
|
||||||
|
* @author luc
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class HTTPInputStream extends InputStream {
|
||||||
|
|
||||||
|
/** HTTP client */
|
||||||
|
private HTTPClient httpClient;
|
||||||
|
|
||||||
|
/** Encapsulated HTTP content stream */
|
||||||
|
private InputStream contentStream;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs from a httpClient.
|
||||||
|
* @param httpClient a httpClient with accessible stream content.
|
||||||
|
* @throws IOException when content stream can not be open on httpClient
|
||||||
|
*/
|
||||||
|
public HTTPInputStream(HTTPClient httpClient) throws IOException {
|
||||||
|
if(httpClient == null) {
|
||||||
|
throw new IllegalArgumentException("httpClient is null");
|
||||||
|
}
|
||||||
|
this.httpClient = httpClient;
|
||||||
|
this.contentStream = httpClient.getContentstream();
|
||||||
|
if(this.contentStream == null) {
|
||||||
|
throw new IOException("content stream is null");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close properly HTTP connection with httpClient
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
httpClient.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return contentStream.read();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return contentStream.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b) throws IOException {
|
||||||
|
return contentStream.read(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return contentStream.equals(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
|
return contentStream.read(b, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(long n) throws IOException {
|
||||||
|
return contentStream.skip(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return contentStream.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
return contentStream.available();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void mark(int readlimit) {
|
||||||
|
contentStream.mark(readlimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void reset() throws IOException {
|
||||||
|
contentStream.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean markSupported() {
|
||||||
|
return contentStream.markSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -24,7 +24,9 @@
|
||||||
|
|
||||||
package net.yacy.crawler.retrieval;
|
package net.yacy.crawler.retrieval;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
import net.yacy.cora.document.id.DigestURL;
|
import net.yacy.cora.document.id.DigestURL;
|
||||||
import net.yacy.cora.federate.solr.FailCategory;
|
import net.yacy.cora.federate.solr.FailCategory;
|
||||||
|
@ -34,7 +36,9 @@ import net.yacy.cora.protocol.RequestHeader;
|
||||||
import net.yacy.cora.protocol.ResponseHeader;
|
import net.yacy.cora.protocol.ResponseHeader;
|
||||||
import net.yacy.cora.protocol.http.HTTPClient;
|
import net.yacy.cora.protocol.http.HTTPClient;
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
|
import net.yacy.cora.util.HTTPInputStream;
|
||||||
import net.yacy.crawler.CrawlSwitchboard;
|
import net.yacy.crawler.CrawlSwitchboard;
|
||||||
|
import net.yacy.crawler.data.Cache;
|
||||||
import net.yacy.crawler.data.CrawlProfile;
|
import net.yacy.crawler.data.CrawlProfile;
|
||||||
import net.yacy.crawler.data.Latency;
|
import net.yacy.crawler.data.Latency;
|
||||||
import net.yacy.kelondro.io.ByteCount;
|
import net.yacy.kelondro.io.ByteCount;
|
||||||
|
@ -75,6 +79,208 @@ public final class HTTPLoader {
|
||||||
Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
|
Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open input stream on a requested HTTP resource. When resource is small, fully load it and returns a ByteArrayInputStream instance.
|
||||||
|
* @param request
|
||||||
|
* @param profile crawl profile
|
||||||
|
* @param retryCount remaining redirect retries count
|
||||||
|
* @param maxFileSize max file size to load. -1 means no limit.
|
||||||
|
* @param blacklistType blacklist type to use
|
||||||
|
* @param agent agent identifier
|
||||||
|
* @return an open input stream. Don't forget to close it.
|
||||||
|
* @throws IOException when an error occured
|
||||||
|
*/
|
||||||
|
public InputStream openInputStream(final Request request, CrawlProfile profile, final int retryCount,
|
||||||
|
final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent)
|
||||||
|
throws IOException {
|
||||||
|
if (retryCount < 0) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
|
||||||
|
throw new IOException(
|
||||||
|
"retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
|
||||||
|
}
|
||||||
|
DigestURL url = request.url();
|
||||||
|
|
||||||
|
final String host = url.getHost();
|
||||||
|
if (host == null || host.length() < 2) {
|
||||||
|
throw new IOException("host is not well-formed: '" + host + "'");
|
||||||
|
}
|
||||||
|
final String path = url.getFile();
|
||||||
|
int port = url.getPort();
|
||||||
|
final boolean ssl = url.getProtocol().equals("https");
|
||||||
|
if (port < 0)
|
||||||
|
port = (ssl) ? 443 : 80;
|
||||||
|
|
||||||
|
// check if url is in blacklist
|
||||||
|
final String hostlow = host.toLowerCase();
|
||||||
|
if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT,
|
||||||
|
"url in blacklist", -1);
|
||||||
|
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolve yacy and yacyh domains
|
||||||
|
final AlternativeDomainNames yacyResolver = this.sb.peers;
|
||||||
|
if (yacyResolver != null) {
|
||||||
|
final String yAddress = yacyResolver.resolve(host);
|
||||||
|
if (yAddress != null) {
|
||||||
|
url = new DigestURL(url.getProtocol() + "://" + yAddress + path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a request header
|
||||||
|
final RequestHeader requestHeader = createRequestheader(request, agent);
|
||||||
|
|
||||||
|
// HTTP-Client
|
||||||
|
final HTTPClient client = new HTTPClient(agent);
|
||||||
|
client.setRedirecting(false); // we want to handle redirection
|
||||||
|
// ourselves, so we don't index pages
|
||||||
|
// twice
|
||||||
|
client.setTimout(this.socketTimeout);
|
||||||
|
client.setHeader(requestHeader.entrySet());
|
||||||
|
|
||||||
|
// send request
|
||||||
|
client.GET(url, false);
|
||||||
|
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
|
||||||
|
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
|
||||||
|
String requestURLString = request.url().toNormalform(true);
|
||||||
|
|
||||||
|
// check redirection
|
||||||
|
if (statusCode > 299 && statusCode < 310) {
|
||||||
|
|
||||||
|
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode,
|
||||||
|
responseHeader, requestURLString);
|
||||||
|
|
||||||
|
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
|
||||||
|
// we have two use cases here: loading from a crawl or just
|
||||||
|
// loading the url. Check this:
|
||||||
|
if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) {
|
||||||
|
// put redirect url on the crawler queue to repeat a
|
||||||
|
// double-check
|
||||||
|
request.redirectURL(redirectionUrl);
|
||||||
|
this.sb.crawlStacker.stackCrawl(request);
|
||||||
|
// in the end we must throw an exception (even if this is
|
||||||
|
// not an error, just to abort the current process
|
||||||
|
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to "
|
||||||
|
+ redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
|
||||||
|
}
|
||||||
|
|
||||||
|
// if we are already doing a shutdown we don't need to retry
|
||||||
|
// crawling
|
||||||
|
if (Thread.currentThread().isInterrupted()) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode);
|
||||||
|
throw new IOException(
|
||||||
|
"CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry crawling with new url
|
||||||
|
request.redirectURL(redirectionUrl);
|
||||||
|
return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
|
||||||
|
}
|
||||||
|
// we don't want to follow redirects
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
|
||||||
|
throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine()
|
||||||
|
+ "' for URL '" + requestURLString + "'$");
|
||||||
|
} else if (statusCode == 200 || statusCode == 203) {
|
||||||
|
// the transfer is ok
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When content is not large (less than 1MB), we have better cache it if cache is enabled and url is not local
|
||||||
|
*/
|
||||||
|
long contentLength = client.getHttpResponse().getEntity().getContentLength();
|
||||||
|
if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (1024 * 1024) && !url.isLocal()) {
|
||||||
|
byte[] content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize);
|
||||||
|
|
||||||
|
try {
|
||||||
|
Cache.store(url, responseHeader, content);
|
||||||
|
} catch (final IOException e) {
|
||||||
|
this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ByteArrayInputStream(content);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Returns a HTTPInputStream delegating to
|
||||||
|
* client.getContentstream(). Close method will ensure client is
|
||||||
|
* properly closed.
|
||||||
|
*/
|
||||||
|
return new HTTPInputStream(client);
|
||||||
|
} else {
|
||||||
|
// if the response has not the right response type then reject file
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
|
||||||
|
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine()
|
||||||
|
+ "' for URL '" + requestURLString + "'$");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract redirect URL from response header. Status code is supposed to be between 299 and 310. Parameters must not be null.
|
||||||
|
* @return redirect URL
|
||||||
|
* @throws IOException when an error occured
|
||||||
|
*/
|
||||||
|
private DigestURL extractRedirectURL(final Request request, CrawlProfile profile, DigestURL url,
|
||||||
|
final HTTPClient client, final int statusCode, final ResponseHeader responseHeader, String requestURLString)
|
||||||
|
throws IOException {
|
||||||
|
// read redirection URL
|
||||||
|
String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION);
|
||||||
|
redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();
|
||||||
|
|
||||||
|
if (redirectionUrlString.isEmpty()) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.TEMPORARY_NETWORK_FAILURE,
|
||||||
|
"no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
|
||||||
|
throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine()
|
||||||
|
+ "' for URL '" + requestURLString + "'$");
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalize URL
|
||||||
|
final DigestURL redirectionUrl = DigestURL.newURL(request.url(), redirectionUrlString);
|
||||||
|
|
||||||
|
// restart crawling with new url
|
||||||
|
this.log.info("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL "
|
||||||
|
+ requestURLString);
|
||||||
|
this.log.info("CRAWLER ..Redirecting request to: " + redirectionUrl.toNormalform(false));
|
||||||
|
|
||||||
|
this.sb.webStructure.generateCitationReference(url, redirectionUrl);
|
||||||
|
|
||||||
|
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
|
||||||
|
FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
|
||||||
|
}
|
||||||
|
return redirectionUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create request header for loading content.
|
||||||
|
* @param request search request
|
||||||
|
* @param agent agent identification information
|
||||||
|
* @return a request header
|
||||||
|
* @throws IOException when an error occured
|
||||||
|
*/
|
||||||
|
private RequestHeader createRequestheader(final Request request, final ClientIdentification.Agent agent)
|
||||||
|
throws IOException {
|
||||||
|
final RequestHeader requestHeader = new RequestHeader();
|
||||||
|
requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent);
|
||||||
|
DigestURL refererURL = null;
|
||||||
|
if (request.referrerhash() != null) {
|
||||||
|
refererURL = this.sb.getURL(request.referrerhash());
|
||||||
|
}
|
||||||
|
if (refererURL != null) {
|
||||||
|
requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
||||||
|
}
|
||||||
|
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
|
||||||
|
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE,
|
||||||
|
this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
|
||||||
|
requestHeader.put(HeaderFramework.ACCEPT_CHARSET,
|
||||||
|
this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
|
||||||
|
requestHeader.put(HeaderFramework.ACCEPT_ENCODING,
|
||||||
|
this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));
|
||||||
|
return requestHeader;
|
||||||
|
}
|
||||||
|
|
||||||
private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
|
private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
|
||||||
|
|
||||||
|
@ -112,15 +318,7 @@ public final class HTTPLoader {
|
||||||
Response response = null;
|
Response response = null;
|
||||||
|
|
||||||
// create a request header
|
// create a request header
|
||||||
final RequestHeader requestHeader = new RequestHeader();
|
final RequestHeader requestHeader = createRequestheader(request, agent);
|
||||||
requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent);
|
|
||||||
DigestURL refererURL = null;
|
|
||||||
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
|
|
||||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
|
||||||
requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT));
|
|
||||||
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE));
|
|
||||||
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET));
|
|
||||||
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));
|
|
||||||
|
|
||||||
// HTTP-Client
|
// HTTP-Client
|
||||||
final HTTPClient client = new HTTPClient(agent);
|
final HTTPClient client = new HTTPClient(agent);
|
||||||
|
@ -137,27 +335,8 @@ public final class HTTPLoader {
|
||||||
// check redirection
|
// check redirection
|
||||||
if (statusCode > 299 && statusCode < 310) {
|
if (statusCode > 299 && statusCode < 310) {
|
||||||
|
|
||||||
// read redirection URL
|
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode,
|
||||||
String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION);
|
responseHeader, requestURLString);
|
||||||
redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim();
|
|
||||||
|
|
||||||
if (redirectionUrlString.isEmpty()) {
|
|
||||||
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
|
|
||||||
throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$");
|
|
||||||
}
|
|
||||||
|
|
||||||
// normalize URL
|
|
||||||
final DigestURL redirectionUrl = DigestURL.newURL(request.url(), redirectionUrlString);
|
|
||||||
|
|
||||||
// restart crawling with new url
|
|
||||||
this.log.info("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + requestURLString);
|
|
||||||
this.log.info("CRAWLER ..Redirecting request to: " + redirectionUrl.toNormalform(false));
|
|
||||||
|
|
||||||
this.sb.webStructure.generateCitationReference(url, redirectionUrl);
|
|
||||||
|
|
||||||
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
|
|
||||||
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
|
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
|
||||||
// we have two use cases here: loading from a crawl or just loading the url. Check this:
|
// we have two use cases here: loading from a crawl or just loading the url. Check this:
|
||||||
|
|
|
@ -26,8 +26,10 @@
|
||||||
|
|
||||||
package net.yacy.repository;
|
package net.yacy.repository;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
@ -209,54 +211,9 @@ public final class LoaderDispatcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we have the page in the cache
|
// check if we have the page in the cache
|
||||||
if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) {
|
Response response = loadFromCache(request, cacheStrategy, agent, url, crawlProfile);
|
||||||
// we have passed a first test if caching is allowed
|
if(response != null) {
|
||||||
// now see if there is a cache entry
|
return response;
|
||||||
|
|
||||||
final ResponseHeader cachedResponse = (url.isLocal()) ? null : Cache.getResponseHeader(url.hash());
|
|
||||||
if (cachedResponse != null && Cache.hasContent(url.hash())) {
|
|
||||||
// yes we have the content
|
|
||||||
|
|
||||||
// create request header values and a response object because we need that
|
|
||||||
// in case that we want to return the cached content in the next step
|
|
||||||
final RequestHeader requestHeader = new RequestHeader();
|
|
||||||
requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent);
|
|
||||||
DigestURL refererURL = null;
|
|
||||||
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
|
|
||||||
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
|
||||||
final Response response = new Response(
|
|
||||||
request,
|
|
||||||
requestHeader,
|
|
||||||
cachedResponse,
|
|
||||||
crawlProfile,
|
|
||||||
true,
|
|
||||||
null);
|
|
||||||
|
|
||||||
// check which caching strategy shall be used
|
|
||||||
if (cacheStrategy == CacheStrategy.IFEXIST || cacheStrategy == CacheStrategy.CACHEONLY) {
|
|
||||||
// well, just take the cache and don't care about freshness of the content
|
|
||||||
final byte[] content = Cache.getContent(url.hash());
|
|
||||||
if (content != null) {
|
|
||||||
LoaderDispatcher.log.info("cache hit/useall for: " + url.toNormalform(true));
|
|
||||||
response.setContent(content);
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test
|
|
||||||
//assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
|
|
||||||
if (response.isFreshForProxy()) {
|
|
||||||
final byte[] content = Cache.getContent(url.hash());
|
|
||||||
if (content != null) {
|
|
||||||
LoaderDispatcher.log.info("cache hit/fresh for: " + url.toNormalform(true));
|
|
||||||
response.setContent(content);
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LoaderDispatcher.log.info("cache hit/stale for: " + url.toNormalform(true));
|
|
||||||
} else if (cachedResponse != null) {
|
|
||||||
LoaderDispatcher.log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// check case where we want results from the cache exclusively, and never from the Internet (offline mode)
|
// check case where we want results from the cache exclusively, and never from the Internet (offline mode)
|
||||||
|
@ -269,21 +226,7 @@ public final class LoaderDispatcher {
|
||||||
|
|
||||||
// check access time: this is a double-check (we checked possibly already in the balancer)
|
// check access time: this is a double-check (we checked possibly already in the balancer)
|
||||||
// to make sure that we don't DoS the target by mistake
|
// to make sure that we don't DoS the target by mistake
|
||||||
if (!url.isLocal()) {
|
checkAccessTime(agent, url);
|
||||||
final Long lastAccess = accessTime.get(host);
|
|
||||||
long wait = 0;
|
|
||||||
if (lastAccess != null) wait = Math.max(0, agent.minimumDelta + lastAccess.longValue() - System.currentTimeMillis());
|
|
||||||
if (wait > 0) {
|
|
||||||
// force a sleep here. Instead just sleep we clean up the accessTime map
|
|
||||||
final long untilTime = System.currentTimeMillis() + wait;
|
|
||||||
cleanupAccessTimeTable(untilTime);
|
|
||||||
if (System.currentTimeMillis() < untilTime) {
|
|
||||||
long frcdslp = untilTime - System.currentTimeMillis();
|
|
||||||
LoaderDispatcher.log.info("Forcing sleep of " + frcdslp + " ms for host " + host);
|
|
||||||
try {Thread.sleep(frcdslp);} catch (final InterruptedException ee) {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now it's for sure that we will access the target. Remember the access time
|
// now it's for sure that we will access the target. Remember the access time
|
||||||
if (host != null) {
|
if (host != null) {
|
||||||
|
@ -292,7 +235,6 @@ public final class LoaderDispatcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
// load resource from the internet
|
// load resource from the internet
|
||||||
Response response = null;
|
|
||||||
if (protocol.equals("http") || protocol.equals("https")) {
|
if (protocol.equals("http") || protocol.equals("https")) {
|
||||||
response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, agent);
|
response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, agent);
|
||||||
} else if (protocol.equals("ftp")) {
|
} else if (protocol.equals("ftp")) {
|
||||||
|
@ -331,6 +273,167 @@ public final class LoaderDispatcher {
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try loading requested resource from cache according to cache strategy
|
||||||
|
* @param request request to resource
|
||||||
|
* @param cacheStrategy cache strategy to use
|
||||||
|
* @param agent agent identifier
|
||||||
|
* @param url resource url
|
||||||
|
* @param crawlProfile crawl profile
|
||||||
|
* @return a Response instance when resource could be loaded from cache, or null.
|
||||||
|
* @throws IOException when an error occured
|
||||||
|
*/
|
||||||
|
private Response loadFromCache(final Request request, CacheStrategy cacheStrategy, ClientIdentification.Agent agent,
|
||||||
|
final DigestURL url, final CrawlProfile crawlProfile) throws IOException {
|
||||||
|
Response response = null;
|
||||||
|
if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) {
|
||||||
|
// we have passed a first test if caching is allowed
|
||||||
|
// now see if there is a cache entry
|
||||||
|
|
||||||
|
final ResponseHeader cachedResponse = (url.isLocal()) ? null : Cache.getResponseHeader(url.hash());
|
||||||
|
if (cachedResponse != null && Cache.hasContent(url.hash())) {
|
||||||
|
// yes we have the content
|
||||||
|
|
||||||
|
// create request header values and a response object because we need that
|
||||||
|
// in case that we want to return the cached content in the next step
|
||||||
|
final RequestHeader requestHeader = new RequestHeader();
|
||||||
|
requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent);
|
||||||
|
DigestURL refererURL = null;
|
||||||
|
if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash());
|
||||||
|
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true));
|
||||||
|
response = new Response(
|
||||||
|
request,
|
||||||
|
requestHeader,
|
||||||
|
cachedResponse,
|
||||||
|
crawlProfile,
|
||||||
|
true,
|
||||||
|
null);
|
||||||
|
|
||||||
|
// check which caching strategy shall be used
|
||||||
|
if (cacheStrategy == CacheStrategy.IFEXIST || cacheStrategy == CacheStrategy.CACHEONLY) {
|
||||||
|
// well, just take the cache and don't care about freshness of the content
|
||||||
|
final byte[] content = Cache.getContent(url.hash());
|
||||||
|
if (content != null) {
|
||||||
|
LoaderDispatcher.log.info("cache hit/useall for: " + url.toNormalform(true));
|
||||||
|
response.setContent(content);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test
|
||||||
|
//assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
|
||||||
|
if (response.isFreshForProxy()) {
|
||||||
|
final byte[] content = Cache.getContent(url.hash());
|
||||||
|
if (content != null) {
|
||||||
|
LoaderDispatcher.log.info("cache hit/fresh for: " + url.toNormalform(true));
|
||||||
|
response.setContent(content);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LoaderDispatcher.log.info("cache hit/stale for: " + url.toNormalform(true));
|
||||||
|
} else if (cachedResponse != null) {
|
||||||
|
LoaderDispatcher.log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open an InputStream on a resource from the web, from ftp, from smb or a file
|
||||||
|
* @param request the request essentials
|
||||||
|
* @param cacheStratgy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY
|
||||||
|
* @return an open ImageInputStream. Don't forget to close it once used!
|
||||||
|
* @throws IOException when url is malformed, blacklisted, or CacheStrategy is CACHEONLY and content is unavailable
|
||||||
|
*/
|
||||||
|
private InputStream openInputStreamInternal(final Request request, CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException {
|
||||||
|
// get the protocol of the next URL
|
||||||
|
final DigestURL url = request.url();
|
||||||
|
if (url.isFile() || url.isSMB()) {
|
||||||
|
cacheStrategy = CacheStrategy.NOCACHE; // load just from the file
|
||||||
|
// system
|
||||||
|
}
|
||||||
|
final String protocol = url.getProtocol();
|
||||||
|
final String host = url.getHost();
|
||||||
|
final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle()));
|
||||||
|
|
||||||
|
// check if url is in blacklist
|
||||||
|
if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) {
|
||||||
|
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
|
||||||
|
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if we have the page in the cache
|
||||||
|
Response cachedResponse = loadFromCache(request, cacheStrategy, agent, url, crawlProfile);
|
||||||
|
if(cachedResponse != null) {
|
||||||
|
return new ByteArrayInputStream(cachedResponse.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
// check case where we want results from the cache exclusively, and never from the Internet (offline mode)
|
||||||
|
if (cacheStrategy == CacheStrategy.CACHEONLY) {
|
||||||
|
// we had a chance to get the content from the cache .. its over. We don't have it.
|
||||||
|
throw new IOException("cache only strategy");
|
||||||
|
}
|
||||||
|
|
||||||
|
// now forget about the cache, nothing there. Try to load the content from the Internet
|
||||||
|
|
||||||
|
// check access time: this is a double-check (we checked possibly already in the balancer)
|
||||||
|
// to make sure that we don't DoS the target by mistake
|
||||||
|
checkAccessTime(agent, url);
|
||||||
|
|
||||||
|
// now it's for sure that we will access the target. Remember the access time
|
||||||
|
if (host != null) {
|
||||||
|
if (accessTime.size() > accessTimeMaxsize) accessTime.clear(); // prevent a memory leak here
|
||||||
|
accessTime.put(host, System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
// load resource from the internet
|
||||||
|
InputStream inStream = null;
|
||||||
|
if (protocol.equals("http") || protocol.equals("https")) {
|
||||||
|
inStream = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent);
|
||||||
|
} else if (protocol.equals("ftp") || protocol.equals("smb") || protocol.equals("file")) {
|
||||||
|
// may also open directly stream with ftp loader
|
||||||
|
inStream = url.getInputStream(agent, null, null);
|
||||||
|
} else {
|
||||||
|
throw new IOException("Unsupported protocol '" + protocol + "' in url " + url);
|
||||||
|
}
|
||||||
|
if (inStream == null) {
|
||||||
|
throw new IOException("Unable to open content stream");
|
||||||
|
}
|
||||||
|
|
||||||
|
return inStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check access time: this is a double-check (we checked possibly already in the balancer)
|
||||||
|
* to make sure that we don't DoS the target by mistake
|
||||||
|
* @param agent agent identifier
|
||||||
|
* @param url target url
|
||||||
|
*/
|
||||||
|
private void checkAccessTime(ClientIdentification.Agent agent, final DigestURL url) {
|
||||||
|
if (!url.isLocal()) {
|
||||||
|
String host = url.getHost();
|
||||||
|
final Long lastAccess = accessTime.get(host);
|
||||||
|
long wait = 0;
|
||||||
|
if (lastAccess != null)
|
||||||
|
wait = Math.max(0, agent.minimumDelta + lastAccess.longValue() - System.currentTimeMillis());
|
||||||
|
if (wait > 0) {
|
||||||
|
// force a sleep here. Instead just sleep we clean up the
|
||||||
|
// accessTime map
|
||||||
|
final long untilTime = System.currentTimeMillis() + wait;
|
||||||
|
cleanupAccessTimeTable(untilTime);
|
||||||
|
if (System.currentTimeMillis() < untilTime) {
|
||||||
|
long frcdslp = untilTime - System.currentTimeMillis();
|
||||||
|
LoaderDispatcher.log.info("Forcing sleep of " + frcdslp + " ms for host " + host);
|
||||||
|
try {
|
||||||
|
Thread.sleep(frcdslp);
|
||||||
|
} catch (final InterruptedException ee) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private int protocolMaxFileSize(final DigestURL url) {
|
private int protocolMaxFileSize(final DigestURL url) {
|
||||||
if (url.isHTTP() || url.isHTTPS())
|
if (url.isHTTP() || url.isHTTPS())
|
||||||
return this.sb.getConfigInt("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
|
return this.sb.getConfigInt("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
|
||||||
|
@ -357,6 +460,53 @@ public final class LoaderDispatcher {
|
||||||
// read resource body (if it is there)
|
// read resource body (if it is there)
|
||||||
return entry.getContent();
|
return entry.getContent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open url as InputStream from the web or the cache
|
||||||
|
* @param request must be not null
|
||||||
|
* @param cacheStrategy cache strategy to use
|
||||||
|
* @param blacklistType black list
|
||||||
|
* @param agent agent identification for HTTP requests
|
||||||
|
* @return an open InputStream on content. Don't forget to close it once used.
|
||||||
|
* @throws IOException when url is malformed or blacklisted
|
||||||
|
*/
|
||||||
|
public InputStream openInputStream(final Request request, final CacheStrategy cacheStrategy,
|
||||||
|
BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException {
|
||||||
|
final int maxFileSize = protocolMaxFileSize(request.url());
|
||||||
|
InputStream stream = null;
|
||||||
|
|
||||||
|
Semaphore check = this.loaderSteering.get(request.url());
|
||||||
|
if (check != null && cacheStrategy != CacheStrategy.NOCACHE) {
|
||||||
|
// a loading process is going on for that url
|
||||||
|
long t = System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
check.tryAcquire(5, TimeUnit.SECONDS);
|
||||||
|
} catch (final InterruptedException e) {
|
||||||
|
}
|
||||||
|
ConcurrentLog.info("LoaderDispatcher",
|
||||||
|
"waited " + (System.currentTimeMillis() - t) + " ms for " + request.url().toNormalform(true));
|
||||||
|
// now the process may have terminated and we run a normal loading
|
||||||
|
// which may be successful faster because of a cache hit
|
||||||
|
}
|
||||||
|
|
||||||
|
this.loaderSteering.put(request.url(), new Semaphore(0));
|
||||||
|
try {
|
||||||
|
stream = openInputStreamInternal(request, cacheStrategy, maxFileSize, blacklistType, agent);
|
||||||
|
} catch(IOException ioe) {
|
||||||
|
/* Do not re encapsulate eventual IOException in an IOException */
|
||||||
|
throw ioe;
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
} finally {
|
||||||
|
// release the semaphore anyway
|
||||||
|
check = this.loaderSteering.remove(request.url());
|
||||||
|
if (check != null) {
|
||||||
|
check.release(1000); // don't block any other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException, Parser.Failure {
|
public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException, Parser.Failure {
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import javax.imageio.stream.ImageInputStream;
|
||||||
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
import net.yacy.peers.graphics.EncodedImage;
|
import net.yacy.peers.graphics.EncodedImage;
|
||||||
import net.yacy.server.serverObjects;
|
import net.yacy.server.serverObjects;
|
||||||
|
@ -75,8 +78,9 @@ public class ViewImagePerfTest extends ViewImageTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Process inFile image, update processedFiles list and failures map, and append measurements to results_perfs.txt. All
|
* Process inFile image, update processedFiles list and failures map, and
|
||||||
* parameters must not be null.
|
* append measurements to results_perfs.txt. All parameters must not be
|
||||||
|
* null.
|
||||||
*
|
*
|
||||||
* @param ext
|
* @param ext
|
||||||
* output encoding image format
|
* output encoding image format
|
||||||
|
@ -92,7 +96,7 @@ public class ViewImagePerfTest extends ViewImageTest {
|
||||||
* when an read/write error occured
|
* when an read/write error occured
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected void processFile(String ext, File outDir, serverObjects post, Map<String, Exception> failures,
|
protected void processFile(String ext, File outDir, serverObjects post, Map<String, Throwable> failures,
|
||||||
File inFile) throws IOException {
|
File inFile) throws IOException {
|
||||||
/* Delete eventual previous result file */
|
/* Delete eventual previous result file */
|
||||||
System.out
|
System.out
|
||||||
|
@ -102,43 +106,43 @@ public class ViewImagePerfTest extends ViewImageTest {
|
||||||
outFile.delete();
|
outFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] resourceb = getBytes(inFile);
|
|
||||||
String urlString = inFile.getAbsolutePath();
|
String urlString = inFile.getAbsolutePath();
|
||||||
EncodedImage img = null;
|
EncodedImage img = null;
|
||||||
Exception error = null;
|
Exception error = null;
|
||||||
|
long beginTime = System.nanoTime(), time, minTime = Long.MAX_VALUE, maxTime = 0, meanTime = 0, totalTime = 0;
|
||||||
|
int step = 0;
|
||||||
|
for (step = 0; (totalTime / 1000000000) < this.minMeasureTime; step++) {
|
||||||
|
beginTime = System.nanoTime();
|
||||||
|
ImageInputStream inStream = ImageIO.createImageInputStream(inFile);
|
||||||
|
try {
|
||||||
|
img = ViewImage.parseAndScale(post, true, urlString, ext, inStream);
|
||||||
|
} catch (Exception e) {
|
||||||
|
error = e;
|
||||||
|
}
|
||||||
|
time = System.nanoTime() - beginTime;
|
||||||
|
minTime = Math.min(minTime, time);
|
||||||
|
maxTime = Math.max(maxTime, time);
|
||||||
|
totalTime += time;
|
||||||
|
}
|
||||||
|
if (step > 0) {
|
||||||
|
meanTime = totalTime / step;
|
||||||
|
} else {
|
||||||
|
meanTime = totalTime;
|
||||||
|
}
|
||||||
|
PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results_perfs.txt"), true));
|
||||||
try {
|
try {
|
||||||
long beginTime, time, minTime = Long.MAX_VALUE, maxTime = 0, meanTime = 0, totalTime = 0;
|
writeMessage("Measured ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext,
|
||||||
int step = 0;
|
resultsWriter);
|
||||||
for (step = 0; (totalTime / 1000000000) < this.minMeasureTime; step++) {
|
if(img == null) {
|
||||||
beginTime = System.nanoTime();
|
writeMessage("Image could not be rendered! Measurement show time needed to read and parse image data until error detection.", resultsWriter);
|
||||||
img = ViewImage.parseAndScale(post, true, urlString, ext, false, resourceb);
|
|
||||||
time = System.nanoTime() - beginTime;
|
|
||||||
if (img == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
minTime = Math.min(minTime, time);
|
|
||||||
maxTime = Math.max(maxTime, time);
|
|
||||||
totalTime += time;
|
|
||||||
}
|
}
|
||||||
if (img == null) {
|
writeMessage("Render total time (ms) : " + (totalTime) / 1000000 + " on " + step + " steps.",
|
||||||
System.out.println("Image could not be rendered!");
|
resultsWriter);
|
||||||
} else {
|
writeMessage("Render mean time (ms) : " + (meanTime) / 1000000, resultsWriter);
|
||||||
meanTime = totalTime / step;
|
writeMessage("Render min time (ms) : " + (minTime) / 1000000, resultsWriter);
|
||||||
PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results_perfs.txt"), true));
|
writeMessage("Render max time (ms) : " + (maxTime) / 1000000, resultsWriter);
|
||||||
try {
|
} finally {
|
||||||
writeMessage("Measured ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : "
|
resultsWriter.close();
|
||||||
+ ext, resultsWriter);
|
|
||||||
writeMessage("Render total time (ms) : " + (totalTime) / 1000000 + " on " + step + " steps.",
|
|
||||||
resultsWriter);
|
|
||||||
writeMessage("Render mean time (ms) : " + (meanTime) / 1000000, resultsWriter);
|
|
||||||
writeMessage("Render min time (ms) : " + (minTime) / 1000000, resultsWriter);
|
|
||||||
writeMessage("Render max time (ms) : " + (maxTime) / 1000000, resultsWriter);
|
|
||||||
} finally {
|
|
||||||
resultsWriter.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
error = e;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (img == null) {
|
if (img == null) {
|
||||||
|
@ -218,7 +222,7 @@ public class ViewImagePerfTest extends ViewImageTest {
|
||||||
System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath());
|
System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath());
|
||||||
|
|
||||||
List<File> processedFiles = new ArrayList<File>();
|
List<File> processedFiles = new ArrayList<File>();
|
||||||
Map<String, Exception> failures = new TreeMap<>();
|
Map<String, Throwable> failures = new TreeMap<>();
|
||||||
try {
|
try {
|
||||||
long time = System.nanoTime();
|
long time = System.nanoTime();
|
||||||
test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures);
|
test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures);
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.FileWriter;
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -12,6 +10,9 @@ import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import javax.imageio.ImageIO;
|
||||||
|
import javax.imageio.stream.ImageInputStream;
|
||||||
|
|
||||||
import net.yacy.cora.util.ConcurrentLog;
|
import net.yacy.cora.util.ConcurrentLog;
|
||||||
import net.yacy.peers.graphics.EncodedImage;
|
import net.yacy.peers.graphics.EncodedImage;
|
||||||
import net.yacy.server.serverObjects;
|
import net.yacy.server.serverObjects;
|
||||||
|
@ -52,24 +53,6 @@ public class ViewImageTest {
|
||||||
/** Default output encoding format */
|
/** Default output encoding format */
|
||||||
private static final String DEFAULT_OUT_EXT = "png";
|
private static final String DEFAULT_OUT_EXT = "png";
|
||||||
|
|
||||||
/**
|
|
||||||
* @param testFile
|
|
||||||
* file to load
|
|
||||||
* @return testFile content as a bytes array
|
|
||||||
* @throws IOException
|
|
||||||
* when an error occured while loading
|
|
||||||
*/
|
|
||||||
protected byte[] getBytes(File testFile) throws IOException {
|
|
||||||
InputStream inStream = new FileInputStream(testFile);
|
|
||||||
byte[] res = new byte[inStream.available()];
|
|
||||||
try {
|
|
||||||
inStream.read(res);
|
|
||||||
} finally {
|
|
||||||
inStream.close();
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param args
|
* @param args
|
||||||
* main parameters. first item may contain input file or folder
|
* main parameters. first item may contain input file or folder
|
||||||
|
@ -207,7 +190,7 @@ public class ViewImageTest {
|
||||||
* @param processedFiles
|
* @param processedFiles
|
||||||
* all processed image files
|
* all processed image files
|
||||||
* @param failures
|
* @param failures
|
||||||
* map input file url which failed with eventual cause exception
|
* map input file url which failed with eventual cause error
|
||||||
* @param time
|
* @param time
|
||||||
* total processing time in nanoseconds
|
* total processing time in nanoseconds
|
||||||
* @param outDir
|
* @param outDir
|
||||||
|
@ -215,7 +198,7 @@ public class ViewImageTest {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* when a write error occured writing the results file
|
* when a write error occured writing the results file
|
||||||
*/
|
*/
|
||||||
protected void displayResults(List<File> processedFiles, Map<String, Exception> failures, long time, File outDir)
|
protected void displayResults(List<File> processedFiles, Map<String, Throwable> failures, long time, File outDir)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results.txt")));
|
PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results.txt")));
|
||||||
try {
|
try {
|
||||||
|
@ -226,7 +209,7 @@ public class ViewImageTest {
|
||||||
} else {
|
} else {
|
||||||
writeMessage("Some input files could not be processed :", resultsWriter);
|
writeMessage("Some input files could not be processed :", resultsWriter);
|
||||||
}
|
}
|
||||||
for (Entry<String, Exception> entry : failures.entrySet()) {
|
for (Entry<String, Throwable> entry : failures.entrySet()) {
|
||||||
writeMessage(entry.getKey(), resultsWriter);
|
writeMessage(entry.getKey(), resultsWriter);
|
||||||
if (entry.getValue() != null) {
|
if (entry.getValue() != null) {
|
||||||
writeMessage("cause : " + entry.getValue(), resultsWriter);
|
writeMessage("cause : " + entry.getValue(), resultsWriter);
|
||||||
|
@ -266,7 +249,7 @@ public class ViewImageTest {
|
||||||
* when an read/write error occured
|
* when an read/write error occured
|
||||||
*/
|
*/
|
||||||
protected void processFiles(String ext, boolean recursive, File outDir, serverObjects post, File[] inFiles,
|
protected void processFiles(String ext, boolean recursive, File outDir, serverObjects post, File[] inFiles,
|
||||||
List<File> processedFiles, Map<String, Exception> failures) throws IOException {
|
List<File> processedFiles, Map<String, Throwable> failures) throws IOException {
|
||||||
for (File inFile : inFiles) {
|
for (File inFile : inFiles) {
|
||||||
if (inFile.isDirectory()) {
|
if (inFile.isDirectory()) {
|
||||||
if (recursive) {
|
if (recursive) {
|
||||||
|
@ -291,7 +274,7 @@ public class ViewImageTest {
|
||||||
* @param inFile file image to process
|
* @param inFile file image to process
|
||||||
* @throws IOException when an read/write error occured
|
* @throws IOException when an read/write error occured
|
||||||
*/
|
*/
|
||||||
protected void processFile(String ext, File outDir, serverObjects post, Map<String, Exception> failures, File inFile)
|
protected void processFile(String ext, File outDir, serverObjects post, Map<String, Throwable> failures, File inFile)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
/* Delete eventual previous result file */
|
/* Delete eventual previous result file */
|
||||||
File outFile = new File(outDir, inFile.getName() + "." + ext);
|
File outFile = new File(outDir, inFile.getName() + "." + ext);
|
||||||
|
@ -299,13 +282,13 @@ public class ViewImageTest {
|
||||||
outFile.delete();
|
outFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] resourceb = getBytes(inFile);
|
ImageInputStream inStream = ImageIO.createImageInputStream(inFile);
|
||||||
String urlString = inFile.getAbsolutePath();
|
String urlString = inFile.getAbsolutePath();
|
||||||
EncodedImage img = null;
|
EncodedImage img = null;
|
||||||
Exception error = null;
|
Throwable error = null;
|
||||||
try {
|
try {
|
||||||
img = ViewImage.parseAndScale(post, true, urlString, ext, false, resourceb);
|
img = ViewImage.parseAndScale(post, true, urlString, ext, inStream);
|
||||||
} catch (Exception e) {
|
} catch (Throwable e) {
|
||||||
error = e;
|
error = e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -383,7 +366,7 @@ public class ViewImageTest {
|
||||||
System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath());
|
System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath());
|
||||||
|
|
||||||
List<File> processedFiles = new ArrayList<File>();
|
List<File> processedFiles = new ArrayList<File>();
|
||||||
Map<String, Exception> failures = new TreeMap<>();
|
Map<String, Throwable> failures = new TreeMap<>();
|
||||||
try {
|
try {
|
||||||
long time = System.nanoTime();
|
long time = System.nanoTime();
|
||||||
test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures);
|
test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user