mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
increased default maximum file size for database files to 2GB
Other file sizes can now be configured with the attributes filesize.max.win and filesize.max.other the default maximum file size for non-windows OS is now 32GB git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5974 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
eb36c9a092
commit
26a46b5521
|
@ -73,6 +73,12 @@ server.maxTrackingCount = 1000
|
|||
# maximum number of hosts that are tracked
|
||||
server.maxTrackingHostCount = 100
|
||||
|
||||
# maximum file sizes: since some users experience problems with too large files
|
||||
# the file size of database files can be limited. Larger files can be used to get a
|
||||
# better IO performance and to use less RAM; however, if the size must be limited
|
||||
# because of limitations of the file system, the maximum size can be set here
|
||||
filesize.max.win = 2147483647
|
||||
filesize.max.other = 34359738367
|
||||
|
||||
# Network Definition
|
||||
# There can be separate YaCy networks, and managed sub-groups of the general network.
|
||||
|
|
|
@ -483,7 +483,7 @@ public class URLAnalysis {
|
|||
} else if (args[0].equals("-diffurlcol") && args.length >= 3) {
|
||||
// make a diff-file that contains hashes from the url database that do not occur in the collection reference dump
|
||||
// example:
|
||||
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT used.dump diffurlcol.dump
|
||||
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -diffurlcol DATA/INDEX/freeworld/TEXT/METADATA used.dump diffurlcol.dump
|
||||
try {
|
||||
diffurlcol(args[1], args[2], args[3]);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -66,7 +66,6 @@ public class BLOBArray implements BLOB {
|
|||
*/
|
||||
|
||||
public static final long oneMonth = 1000L * 60L * 60L * 24L * 365L / 12L;
|
||||
public static final long oneGigabyte = 1024L * 1024L * 1024L;
|
||||
|
||||
private int keylength;
|
||||
private ByteOrder ordering;
|
||||
|
@ -91,7 +90,7 @@ public class BLOBArray implements BLOB {
|
|||
this.buffersize = buffersize;
|
||||
this.heapLocation = heapLocation;
|
||||
this.fileAgeLimit = oneMonth;
|
||||
this.fileSizeLimit = oneGigabyte;
|
||||
this.fileSizeLimit = (long) Integer.MAX_VALUE;
|
||||
this.repositoryAgeMax = Long.MAX_VALUE;
|
||||
this.repositorySizeMax = Long.MAX_VALUE;
|
||||
|
||||
|
@ -327,7 +326,7 @@ public class BLOBArray implements BLOB {
|
|||
|
||||
public void setMaxSize(long maxSize) {
|
||||
this.repositorySizeMax = maxSize;
|
||||
this.fileSizeLimit = Math.min(oneGigabyte, maxSize / 10);
|
||||
this.fileSizeLimit = Math.min((long) Integer.MAX_VALUE, maxSize / 10L);
|
||||
}
|
||||
|
||||
private void executeLimits() {
|
||||
|
|
|
@ -90,7 +90,7 @@ public class SplitTable implements ObjectIndex {
|
|||
final String tablename,
|
||||
final Row rowdef,
|
||||
final boolean resetOnFail) {
|
||||
this(path, tablename, rowdef, BLOBArray.oneMonth, BLOBArray.oneGigabyte, resetOnFail);
|
||||
this(path, tablename, rowdef, BLOBArray.oneMonth, (long) Integer.MAX_VALUE, resetOnFail);
|
||||
}
|
||||
|
||||
public SplitTable(
|
||||
|
|
|
@ -68,7 +68,6 @@ public final class MetadataRepository implements Iterable<byte[]> {
|
|||
this.urlIndexFile = new Cache(new SplitTable(this.location, "urls", URLMetadataRow.rowdef, false));
|
||||
this.exportthread = null; // will have a export thread assigned if exporter is running
|
||||
this.statsDump = null;
|
||||
|
||||
}
|
||||
|
||||
public void clearCache() {
|
||||
|
|
|
@ -170,6 +170,7 @@ import de.anomic.server.serverProcessorJob;
|
|||
import de.anomic.server.serverProfiling;
|
||||
import de.anomic.server.serverSemaphore;
|
||||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.serverSystem;
|
||||
import de.anomic.server.serverThread;
|
||||
import de.anomic.tools.crypt;
|
||||
import de.anomic.tools.CryptoLib;
|
||||
|
@ -319,8 +320,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
// start indexing management
|
||||
log.logConfig("Starting Indexing Management");
|
||||
final String networkName = getConfig(plasmaSwitchboardConstants.NETWORK_NAME, "");
|
||||
final boolean useCommons = getConfigBool("index.storeCommons", false);
|
||||
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
|
||||
final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
|
||||
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
|
||||
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
|
||||
try {
|
||||
webIndex = new plasmaWordIndex(
|
||||
|
@ -329,7 +330,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
indexPrimaryPath,
|
||||
indexSecondaryPath,
|
||||
wordCacheMaxCount,
|
||||
useCommons,
|
||||
fileSizeMax,
|
||||
redundancy,
|
||||
paritionExponent);
|
||||
} catch (IOException e1) {
|
||||
|
@ -795,7 +796,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
final File indexPrimaryPath = getConfigPath(plasmaSwitchboardConstants.INDEX_PRIMARY_PATH, plasmaSwitchboardConstants.INDEX_PATH_DEFAULT);
|
||||
final File indexSecondaryPath = (getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, "").length() == 0) ? indexPrimaryPath : new File(getConfig(plasmaSwitchboardConstants.INDEX_SECONDARY_PATH, ""));
|
||||
final int wordCacheMaxCount = (int) getConfigLong(plasmaSwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);
|
||||
final boolean useCommons = getConfigBool("index.storeCommons", false);
|
||||
final long fileSizeMax = (serverSystem.isWindows) ? sb.getConfigLong("filesize.max.win", (long) Integer.MAX_VALUE) : sb.getConfigLong("filesize.max.other", (long) Integer.MAX_VALUE);
|
||||
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
|
||||
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
|
||||
try {
|
||||
|
@ -805,7 +806,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
|
|||
indexPrimaryPath,
|
||||
indexSecondaryPath,
|
||||
wordCacheMaxCount,
|
||||
useCommons,
|
||||
fileSizeMax,
|
||||
redundancy,
|
||||
paritionExponent);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -41,7 +41,6 @@ import de.anomic.crawler.IndexingStack;
|
|||
import de.anomic.data.Blacklist;
|
||||
import de.anomic.htmlFilter.htmlFilterContentScraper;
|
||||
import de.anomic.http.httpdProxyCacheEntry;
|
||||
import de.anomic.kelondro.blob.BLOBArray;
|
||||
import de.anomic.kelondro.order.Base64Order;
|
||||
import de.anomic.kelondro.order.ByteOrder;
|
||||
import de.anomic.kelondro.text.BufferedIndex;
|
||||
|
@ -71,9 +70,7 @@ public final class plasmaWordIndex {
|
|||
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
|
||||
public static final int wCacheMaxChunk = 800; // maximum number of references for each urlhash
|
||||
public static final int lowcachedivisor = 900;
|
||||
public static final int maxCollectionPartition = 7; // should be 7
|
||||
public static final long targetFileSize = 100 * 1024 * 1024; // 100 MB
|
||||
public static final long maxFileSize = BLOBArray.oneGigabyte; // 1GB
|
||||
public static final long targetFileSize = 256 * 1024 * 1024; // 256 MB
|
||||
public static final int writeBufferSize = 4 * 1024 * 1024;
|
||||
|
||||
// the reference factory
|
||||
|
@ -120,7 +117,7 @@ public final class plasmaWordIndex {
|
|||
final File indexPrimaryRoot,
|
||||
final File indexSecondaryRoot,
|
||||
final int entityCacheMaxSize,
|
||||
final boolean useCommons,
|
||||
final long maxFileSize,
|
||||
final int redundancy,
|
||||
final int partitionExponent) throws IOException {
|
||||
|
||||
|
|
|
@ -676,7 +676,15 @@ public final class yacy {
|
|||
final int cacheMem = (int)(MemoryControl.maxMemory - MemoryControl.total());
|
||||
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
|
||||
|
||||
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
|
||||
final plasmaWordIndex wordIndex = new plasmaWordIndex(
|
||||
networkName,
|
||||
log,
|
||||
indexPrimaryRoot,
|
||||
indexSecondaryRoot,
|
||||
10000,
|
||||
(long) Integer.MAX_VALUE,
|
||||
0,
|
||||
0);
|
||||
final Iterator<ReferenceContainer<WordReference>> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA".getBytes(), false, false);
|
||||
|
||||
long urlCounter = 0, wordCounter = 0;
|
||||
|
@ -867,7 +875,15 @@ public final class yacy {
|
|||
try {
|
||||
Iterator<ReferenceContainer<WordReference>> indexContainerIterator = null;
|
||||
if (resource.equals("all")) {
|
||||
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0);
|
||||
WordIndex = new plasmaWordIndex(
|
||||
"freeworld",
|
||||
log,
|
||||
indexPrimaryRoot,
|
||||
indexSecondaryRoot,
|
||||
10000,
|
||||
(long) Integer.MAX_VALUE,
|
||||
1,
|
||||
0);
|
||||
indexContainerIterator = WordIndex.index().references(wordChunkStartHash.getBytes(), false, false);
|
||||
}
|
||||
int counter = 0;
|
||||
|
|
Loading…
Reference in New Issue
Block a user