From 26a46b5521b3027212b03c1dd764f0383b67cc8a Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 25 May 2009 06:59:21 +0000 Subject: [PATCH] increased default maximum file size for database files to 2GB Other file sizes can now be configured with the attributes filesize.max.win and filesize.max.other the default maximum file size for non-windows OS is now 32GB git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5974 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- defaults/yacy.init | 6 ++++++ source/de/anomic/data/URLAnalysis.java | 2 +- source/de/anomic/kelondro/blob/BLOBArray.java | 5 ++--- .../de/anomic/kelondro/table/SplitTable.java | 2 +- .../kelondro/text/MetadataRepository.java | 1 - .../de/anomic/plasma/plasmaSwitchboard.java | 11 +++++----- source/de/anomic/plasma/plasmaWordIndex.java | 7 ++----- source/yacy.java | 20 +++++++++++++++++-- 8 files changed, 36 insertions(+), 18 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index cb6da4117..a040d1706 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -73,6 +73,12 @@ server.maxTrackingCount = 1000 # maximum number of hosts that are tracked server.maxTrackingHostCount = 100 +# maximum file sizes: since some users experience problems with too large files +# the file size of database files can be limited. Larger files can be used to get a +# better IO performance and to use less RAM; however, if the size must be limited +# because of limitations of the file system, the maximum size can be set here +filesize.max.win = 2147483647 +filesize.max.other = 34359738367 # Network Definition # There can be separate YaCy networks, and managed sub-groups of the general network. diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index b3a63123b..02e519522 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -483,7 +483,7 @@ public class URLAnalysis { } else if (args[0].equals("-diffurlcol") && args.length >= 3) { // make a diff-file that contains hashes from the url database that do not occur in the collection reference dump // example: - // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT used.dump diffurlcol.dump + // java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump try { diffurlcol(args[1], args[2], args[3]); } catch (IOException e) { diff --git a/source/de/anomic/kelondro/blob/BLOBArray.java b/source/de/anomic/kelondro/blob/BLOBArray.java index 4de4c939d..702c32bf6 100755 --- a/source/de/anomic/kelondro/blob/BLOBArray.java +++ b/source/de/anomic/kelondro/blob/BLOBArray.java @@ -66,7 +66,6 @@ public class BLOBArray implements BLOB { */ public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L; - public static final long oneGigabyte = 1024L * 1024L * 1024L; private int keylength; private ByteOrder ordering; @@ -91,7 +90,7 @@ public class BLOBArray implements BLOB { this.buffersize = buffersize; this.heapLocation = heapLocation; this.fileAgeLimit = oneMonth; - this.fileSizeLimit = oneGigabyte; + this.fileSizeLimit = (long) Integer.MAX_VALUE; this.repositoryAgeMax = Long.MAX_VALUE; this.repositorySizeMax = Long.MAX_VALUE; @@ -327,7 +326,7 @@ public class BLOBArray implements BLOB { public void setMaxSize(long maxSize) { this.repositorySizeMax = maxSize; - this.fileSizeLimit = Math.min(oneGigabyte, maxSize / 10); + this.fileSizeLimit = Math.min((long) Integer.MAX_VALUE, maxSize / 10L); } private void executeLimits() { diff --git a/source/de/anomic/kelondro/table/SplitTable.java b/source/de/anomic/kelondro/table/SplitTable.java index 78deac567..991d38d31 100644 --- a/source/de/anomic/kelondro/table/SplitTable.java +++ b/source/de/anomic/kelondro/table/SplitTable.java @@ -90,7 +90,7 @@ public class SplitTable implements ObjectIndex { final String tablename, final Row rowdef, final boolean resetOnFail) { - this(path, tablename, rowdef, BLOBArray.oneMonth, BLOBArray.oneGigabyte, resetOnFail); + this(path, tablename, rowdef, BLOBArray.oneMonth, (long) Integer.MAX_VALUE, resetOnFail); } public SplitTable( diff --git a/source/de/anomic/kelondro/text/MetadataRepository.java b/source/de/anomic/kelondro/text/MetadataRepository.java index 05f301aed..96601a15d 100644 --- a/source/de/anomic/kelondro/text/MetadataRepository.java +++ b/source/de/anomic/kelondro/text/MetadataRepository.java @@ -68,7 +68,6 @@ public final class MetadataRepository implements Iterable { this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", URLMetadataRow.rowdef, false)); this.exportthread = null; // will have a export thread assigned if exporter is running this.statsDump = null; - } public void clearCache() { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 5c624478a..095bbfc9a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -170,6 +170,7 @@ import de.anomic.server.serverProcessorJob; import de.anomic.server.serverProfiling; import de.anomic.server.serverSemaphore; import de.anomic.server.serverSwitch; +import de.anomic.server.serverSystem; import de.anomic.server.serverThread; import de.anomic.tools.crypt; import de.anomic.tools.CryptoLib; @@ -319,8 +320,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false); long urlCounter = 0, wordCounter = 0; @@ -867,7 +875,15 @@ public final class yacy { try { Iterator> indexContainerIterator = null; if (resource.equals("all")) { - WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); + WordIndex = new plasmaWordIndex( + "freeworld", + log, + indexPrimaryRoot, + indexSecondaryRoot, + 10000, + (long) Integer.MAX_VALUE, + 1, + 0); indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false); } int counter = 0;