mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
more generic cache methods
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2721 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
72482b1426
commit
0f10bdde22
|
@ -57,7 +57,6 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
|
|||
import de.anomic.htmlFilter.htmlFilterWriter;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.net.URL;
|
||||
import de.anomic.plasma.plasmaHTCache;
|
||||
import de.anomic.plasma.plasmaParserDocument;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.plasma.cache.IResourceInfo;
|
||||
|
|
|
@ -103,17 +103,19 @@ public final class plasmaHTCache {
|
|||
public final File cachePath;
|
||||
public final serverLog log;
|
||||
public static final HashSet filesInUse = new HashSet(); // can we delete this file
|
||||
public final boolean useTreeStorage;
|
||||
public String cacheLayout;
|
||||
public boolean cacheMigration;
|
||||
|
||||
private ResourceInfoFactory objFactory;
|
||||
private serverThread cacheScanThread;
|
||||
|
||||
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb, long preloadTime, boolean useTreeStorage) {
|
||||
public plasmaHTCache(File htCachePath, long maxCacheSize, int bufferkb, long preloadTime, String cacheLayout, boolean cacheMigration) {
|
||||
// this.switchboard = switchboard;
|
||||
|
||||
this.log = new serverLog("HTCACHE");
|
||||
this.cachePath = htCachePath;
|
||||
this.useTreeStorage = useTreeStorage;
|
||||
this.cacheLayout = cacheLayout;
|
||||
this.cacheMigration = cacheMigration;
|
||||
|
||||
// create the object factory
|
||||
this.objFactory = new ResourceInfoFactory();
|
||||
|
@ -661,23 +663,41 @@ public final class plasmaHTCache {
|
|||
if (port >= 0) {
|
||||
fileName.append('!').append(port);
|
||||
}
|
||||
File FileTree = new File(this.cachePath, fileName.toString() + path);
|
||||
|
||||
// generate cache path according to storage method
|
||||
if (cacheLayout.equals("tree")) {
|
||||
File FileTree = treeFile(fileName, path);
|
||||
if (cacheMigration) {
|
||||
moveCachedObject(hashFile(fileName, extention, url), FileTree);
|
||||
}
|
||||
return FileTree;
|
||||
}
|
||||
if (cacheLayout.equals("hash")) {
|
||||
File FileFlat = hashFile(fileName, extention, url);
|
||||
if (cacheMigration) {
|
||||
moveCachedObject(treeFile(fileName, path), FileFlat);
|
||||
}
|
||||
return FileFlat;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private File treeFile(StringBuffer fileName, String path) {
|
||||
return new File(this.cachePath, fileName.toString() + path);
|
||||
}
|
||||
|
||||
private File hashFile(StringBuffer fileName, String extention, URL url) {
|
||||
String urlHash = indexURL.urlHash(url);
|
||||
String hexHash = serverCodings.encodeHex(kelondroBase64Order.enhancedCoder.decode(urlHash));
|
||||
fileName.append('/').append(hexHash.substring(0,2)).append('/').append(hexHash.substring(2,4)).append('/').append(hexHash);
|
||||
StringBuffer f = new StringBuffer(18);
|
||||
f.append('/').append(hexHash.substring(0,2)).append('/').append(hexHash.substring(2,4)).append('/').append(hexHash);
|
||||
if (extention != null) {
|
||||
fileName.append(extention);
|
||||
}
|
||||
File FileFlat = new File(this.cachePath, fileName.toString());
|
||||
if (useTreeStorage) {
|
||||
moveCachedObject(FileFlat, FileTree);
|
||||
return FileTree;
|
||||
} else {
|
||||
moveCachedObject(FileTree, FileFlat);
|
||||
return FileFlat;
|
||||
}
|
||||
return new File(this.cachePath, fileName.toString() + f);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This is a helper funktion that extracts the Hash from the filename
|
||||
*/
|
||||
|
@ -922,7 +942,7 @@ public final class plasmaHTCache {
|
|||
private String name; // the name of the link, read as anchor from an <a>-tag
|
||||
private String nomalizedURLHash;
|
||||
private String nomalizedURLString;
|
||||
private int status; // cache load/hit/stale etc status
|
||||
//private int status; // cache load/hit/stale etc status
|
||||
private Date lastModified;
|
||||
private char doctype;
|
||||
private String language;
|
||||
|
@ -1013,6 +1033,14 @@ public final class plasmaHTCache {
|
|||
return this.nomalizedURLHash;
|
||||
}
|
||||
|
||||
public Date lastModified() {
|
||||
return this.lastModified;
|
||||
}
|
||||
|
||||
public String language() {
|
||||
return this.language;
|
||||
}
|
||||
|
||||
public plasmaCrawlProfile.entry profile() {
|
||||
return this.profile;
|
||||
}
|
||||
|
|
|
@ -449,8 +449,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
|
|||
}
|
||||
this.log.logInfo("HTCACHE Path = " + htCachePath.getAbsolutePath());
|
||||
long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig("proxyCacheSize", "2")); // this is megabyte
|
||||
boolean useTreeStorage = getConfigBool("proxyCacheTree", true);
|
||||
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP, ramHTTP_time, useTreeStorage);
|
||||
String cacheLayout = getConfig("proxyCacheLayout", "tree");
|
||||
boolean cacheMigration = getConfigBool("proxyCacheMigration", true);
|
||||
this.cacheManager = new plasmaHTCache(htCachePath, maxCacheSize, ramHTTP, ramHTTP_time, cacheLayout, cacheMigration);
|
||||
|
||||
// make parser
|
||||
log.logConfig("Starting Parser");
|
||||
|
|
19
yacy.init
19
yacy.init
|
@ -145,19 +145,11 @@ messConfig = httpd.messages
|
|||
# to enable that function, set proxy=true
|
||||
proxy=true
|
||||
|
||||
# a path to the proxy's file cache.
|
||||
# a path to the file cache, used for the internal proxy and as crawl buffer
|
||||
# This will be used if the server is addressed as a proxy
|
||||
proxyCache = DATA/HTCACHE
|
||||
|
||||
# the proxy's maximum disc cache size in megabytes
|
||||
# there should be enough space for the browsing load of an internet caffee
|
||||
# running at 56kbit/s modem speed (this time not unusual)
|
||||
# during 3 days, 8 hours a day
|
||||
# necessary space = 3 * 8 * 60 * 60 * 56 / 8 = 604800 KB = ca. 590 MB
|
||||
# since 600 MB is not much these days (it's below one GB!)
|
||||
# we recommend using that space
|
||||
#proxyCacheSize = 600
|
||||
#for testing:
|
||||
# the maximum disc cache size for files in proxyCache in megabytes
|
||||
proxyCacheSize = 200
|
||||
|
||||
# use the mostly direct mapping of URLs to Filenames
|
||||
|
@ -173,7 +165,12 @@ proxyCacheSize = 200
|
|||
# files that are present under the previously used layout will be renamed
|
||||
# to the new location and thus be accessible immediately. so an accumulated
|
||||
# cache is still usable after the switch.
|
||||
proxyCacheTree = true
|
||||
# possible values are {tree, hash}
|
||||
proxyCacheLayout = tree
|
||||
|
||||
# the migration flag shows, if the different layout shall be migrated from one to another
|
||||
proxyCacheMigration = true
|
||||
|
||||
|
||||
# the following mime-types are the whitelist for indexing
|
||||
#
|
||||
|
|
Loading…
Reference in New Issue
Block a user