increased default maximum file size for database files to 2GB

Other file sizes can now be configured with the attributes
filesize.max.win and filesize.max.other
the default maximum file size for non-windows OS is now 32GB

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5974 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2009-05-25 06:59:21 +00:00
parent eb36c9a092
commit 26a46b5521
8 changed files with 36 additions and 18 deletions

View File

@ -73,6 +73,12 @@ server.maxTrackingCount = 1000
# maximum number of hosts that are tracked
server.maxTrackingHostCount = 100
# maximum file sizes: since some users experience problems with too large files
# the file size of database files can be limited. Larger files can be used to get a
# better IO performance and to use less RAM; however, if the size must be limited
# because of limitations of the file system, the maximum size can be set here
filesize.max.win = 2147483647
filesize.max.other = 34359738367
# Network Definition
# There can be separate YaCy networks, and managed sub-groups of the general network.

View File

@ -483,7 +483,7 @@ public class URLAnalysis {
} else if (args[0].equals("-diffurlcol") && args.length >= 3) {
// make a diff-file that contains hashes from the url database that do not occur in the collection reference dump
// example:
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT used.dump diffurlcol.dump
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump
try {
diffurlcol(args[1], args[2], args[3]);
} catch (IOException e) {

View File

@ -66,7 +66,6 @@ public class BLOBArray implements BLOB {
*/
public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L;
public static final long oneGigabyte = 1024L * 1024L * 1024L;
private int keylength;
private ByteOrder ordering;
@ -91,7 +90,7 @@ public class BLOBArray implements BLOB {
this.buffersize = buffersize;
this.heapLocation = heapLocation;
this.fileAgeLimit = oneMonth;
this.fileSizeLimit = oneGigabyte;
this.fileSizeLimit = (long) Integer.MAX_VALUE;
this.repositoryAgeMax = Long.MAX_VALUE;
this.repositorySizeMax = Long.MAX_VALUE;
@ -327,7 +326,7 @@ public class BLOBArray implements BLOB {
public void setMaxSize(long maxSize) {
this.repositorySizeMax = maxSize;
this.fileSizeLimit = Math.min(oneGigabyte, maxSize / 10);
this.fileSizeLimit = Math.min((long) Integer.MAX_VALUE, maxSize / 10L);
}
private void executeLimits() {

View File

@ -90,7 +90,7 @@ public class SplitTable implements ObjectIndex {
final String tablename,
final Row rowdef,
final boolean resetOnFail) {
this(path, tablename, rowdef, BLOBArray.oneMonth, BLOBArray.oneGigabyte, resetOnFail);
this(path, tablename, rowdef, BLOBArray.oneMonth, (long) Integer.MAX_VALUE, resetOnFail);
}
public SplitTable(

View File

@ -68,7 +68,6 @@ public final class MetadataRepository implements Iterable<byte[]> {
this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", URLMetadataRow.rowdef, false));
this.exportthread = null; // will have a export thread assigned if exporter is running
this.statsDump = null;
}
public void clearCache() {

View File

@ -170,6 +170,7 @@ import de.anomic.server.serverProcessorJob;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSemaphore;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverSystem;
import de.anomic.server.serverThread;
import de.anomic.tools.crypt;
import de.anomic.tools.CryptoLib;
@ -319,8 +320,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// start indexing management
log.logConfig("Starting Indexing Management");
final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, "");
final boolean useCommons = getConfigBool("index.storeCommons", false);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
try {
webIndex = new plasmaWordIndex(
@ -329,7 +330,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
indexPrimaryPath,
indexSecondaryPath,
wordCacheMaxCount,
useCommons,
fileSizeMax,
redundancy,
paritionExponent);
} catch (IOException e1) {
@ -795,7 +796,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final File indexPrimaryPath = getConfigPath(plasmaSwitchboardConstants.INDEX_PRIMARY_PATH, plasmaSwitchboardConstants.INDEX_PATH_DEFAULT);
final File indexSecondaryPath = (getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, ""));
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
final boolean useCommons = getConfigBool("index.storeCommons", false);
final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
try {
@ -805,7 +806,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
indexPrimaryPath,
indexSecondaryPath,
wordCacheMaxCount,
useCommons,
fileSizeMax,
redundancy,
paritionExponent);
} catch (IOException e) {

View File

@ -41,7 +41,6 @@ import de.anomic.crawler.IndexingStack;
import de.anomic.data.Blacklist;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpdProxyCacheEntry;
import de.anomic.kelondro.blob.BLOBArray;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.BufferedIndex;
@ -71,9 +70,7 @@ public final class plasmaWordIndex {
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash
public static final int lowcachedivisor = 900;
public static final int maxCollectionPartition = 7; // should be 7
public static final long targetFileSize = 100 * 1024 * 1024; // 100 MB
public static final long maxFileSize = BLOBArray.oneGigabyte; // 1GB
public static final long targetFileSize = 256 * 1024 * 1024; // 256 MB
public static final int writeBufferSize = 4 * 1024 * 1024;
// the reference factory
@ -120,7 +117,7 @@ public final class plasmaWordIndex {
final File indexPrimaryRoot,
final File indexSecondaryRoot,
final int entityCacheMaxSize,
final boolean useCommons,
final long maxFileSize,
final int redundancy,
final int partitionExponent) throws IOException {

View File

@ -676,7 +676,15 @@ public final class yacy {
final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
final plasmaWordIndex wordIndex = new plasmaWordIndex(
networkName,
log,
indexPrimaryRoot,
indexSecondaryRoot,
10000,
(long) Integer.MAX_VALUE,
0,
0);
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false);
long urlCounter = 0, wordCounter = 0;
@ -867,7 +875,15 @@ public final class yacy {
try {
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
if (resource.equals("all")) {
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
WordIndex = new plasmaWordIndex(
"freeworld",
log,
indexPrimaryRoot,
indexSecondaryRoot,
10000,
(long) Integer.MAX_VALUE,
1,
0);
indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false);
}
int counter = 0;