prevent loading of content from the cache when retrieval with IFFRESH is

used and cache is stale. Should speed up snippet generation when cache
strategy is IFFRESH.
This commit is contained in:
Michael Peter Christen 2012-07-06 08:29:41 +02:00
parent 91f14ea38e
commit c3db015410
2 changed files with 24 additions and 9 deletions

View File

@ -297,6 +297,16 @@ public final class Cache {
} }
} }
public static boolean hasContent(final byte[] hash) {
// load the url as resource from the cache
try {
return fileDB.containsKey(hash);
} catch (final OutOfMemoryError e) {
Log.logException(e);
return false;
}
}
/** /**
* removed response header and cached content from the database * removed response header and cached content from the database
* @param url * @param url

View File

@ -201,8 +201,7 @@ public final class LoaderDispatcher {
// now see if there is a cache entry // now see if there is a cache entry
final ResponseHeader cachedResponse = (url.isLocal()) ? null : Cache.getResponseHeader(url.hash()); final ResponseHeader cachedResponse = (url.isLocal()) ? null : Cache.getResponseHeader(url.hash());
final byte[] content = (cachedResponse == null) ? null : Cache.getContent(url.hash()); if (cachedResponse != null && Cache.hasContent(url.hash())) {
if (cachedResponse != null && content != null) {
// yes we have the content // yes we have the content
// create request header values and a response object because we need that // create request header values and a response object because we need that
@ -218,26 +217,32 @@ public final class LoaderDispatcher {
cachedResponse, cachedResponse,
crawlProfile, crawlProfile,
true, true,
content); null);
// check which caching strategy shall be used // check which caching strategy shall be used
if (cacheStrategy == CacheStrategy.IFEXIST || cacheStrategy == CacheStrategy.CACHEONLY) { if (cacheStrategy == CacheStrategy.IFEXIST || cacheStrategy == CacheStrategy.CACHEONLY) {
// well, just take the cache and don't care about freshness of the content // well, just take the cache and don't care about freshness of the content
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true, false)); final byte[] content = Cache.getContent(url.hash());
return response; if (content != null) {
this.log.logInfo("cache hit/useall for: " + url.toNormalform(true, false));
response.setContent(content);
return response;
}
} }
// now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test // now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test
assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy; assert cacheStrategy == CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
if (response.isFreshForProxy()) { if (response.isFreshForProxy()) {
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true, false)); final byte[] content = Cache.getContent(url.hash());
return response; if (content != null) {
this.log.logInfo("cache hit/fresh for: " + url.toNormalform(true, false));
response.setContent(content);
return response;
}
} }
this.log.logInfo("cache hit/stale for: " + url.toNormalform(true, false)); this.log.logInfo("cache hit/stale for: " + url.toNormalform(true, false));
} else if (cachedResponse != null) { } else if (cachedResponse != null) {
this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true, false)); this.log.logWarning("HTCACHE contained response header, but not content for url " + url.toNormalform(true, false));
} else if (content != null) {
this.log.logWarning("HTCACHE contained content, but not response header for url " + url.toNormalform(true, false));
} }
} }