mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
avoiding double-loading of the same resource from the web in case that a seond attempt to load the resource is started while the first attempt is still loading the content from the web. This will delay the second attempt to the time when the first attempt has finished with the possible result that the second attempt reads only from the web cache, not from the web.
This will also enhance the process of image result display from SVN 7105 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7114 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
fb828f3767
commit
ffaa9a1c51
|
@ -33,10 +33,13 @@ import java.io.Writer;
|
|||
import java.net.MalformedURLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
|
@ -73,6 +76,7 @@ public final class LoaderDispatcher {
|
|||
private final FTPLoader ftpLoader;
|
||||
private final SMBLoader smbLoader;
|
||||
private final FileLoader fileLoader;
|
||||
private final HashMap<String, Semaphore> loaderSteering; // a map that delivers a 'finish' semaphore for urls
|
||||
private final Log log;
|
||||
|
||||
public LoaderDispatcher(final Switchboard sb) {
|
||||
|
@ -81,10 +85,11 @@ public final class LoaderDispatcher {
|
|||
|
||||
// initiate loader objects
|
||||
this.log = new Log("LOADER");
|
||||
httpLoader = new HTTPLoader(sb, log);
|
||||
ftpLoader = new FTPLoader(sb, log);
|
||||
smbLoader = new SMBLoader(sb, log);
|
||||
fileLoader = new FileLoader(sb, log);
|
||||
this.httpLoader = new HTTPLoader(sb, log);
|
||||
this.ftpLoader = new FTPLoader(sb, log);
|
||||
this.smbLoader = new SMBLoader(sb, log);
|
||||
this.fileLoader = new FileLoader(sb, log);
|
||||
this.loaderSteering = new HashMap<String, Semaphore>();
|
||||
}
|
||||
|
||||
public boolean isSupportedProtocol(final String protocol) {
|
||||
|
@ -141,14 +146,38 @@ public final class LoaderDispatcher {
|
|||
tmp.renameTo(targetFile);
|
||||
}
|
||||
|
||||
public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
|
||||
String url = request.url().toNormalform(true, false);
|
||||
Semaphore check = this.loaderSteering.get(url);
|
||||
if (check != null) {
|
||||
// a loading process may be going on for that url
|
||||
try { check.tryAcquire(5, TimeUnit.SECONDS);} catch (InterruptedException e) {}
|
||||
// now the process may have terminated and we run a normal loading
|
||||
// which may be successful faster because of a cache hit
|
||||
}
|
||||
|
||||
try {
|
||||
this.loaderSteering.put(url, new Semaphore(0));
|
||||
Response response = loadInternal(request, cacheStrategy, maxFileSize);
|
||||
check = this.loaderSteering.remove(url);
|
||||
if (check != null) check.release(1000);
|
||||
return response;
|
||||
} catch (Exception e) {
|
||||
// release the semaphore anyway
|
||||
check = this.loaderSteering.remove(url);
|
||||
if (check != null) check.release(1000);
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* load a resource from the web, from ftp, from smb or a file
|
||||
* @param request the request essentials
|
||||
* @param cacheStratgy strategy according to CACHE_STRATEGY_NOCACHE,CACHE_STRATEGY_IFFRESH,CACHE_STRATEGY_IFEXIST,CACHE_STRATEGY_CACHEONLY
|
||||
* @param cacheStratgy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY
|
||||
* @return the loaded entity in a Response object
|
||||
* @throws IOException
|
||||
*/
|
||||
public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
|
||||
public Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
|
||||
// get the protocol of the next URL
|
||||
final DigestURI url = request.url();
|
||||
final String protocol = url.getProtocol();
|
||||
|
|
Loading…
Reference in New Issue
Block a user