mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
ef8aea7f8d
Also used object properties on QueryParams instances, rather than using mutable class (static) properties.
1327 lines
56 KiB
Plaintext
1327 lines
56 KiB
Plaintext
###
|
||
### YaCy Init File
|
||
###
|
||
# These properties will be loaded upon installation.
|
||
# They are used only once for set-up.
|
||
# If you make changes to this file and want these to make any effect,
|
||
# you must delete the yacy.conf file in DATA/SETTINGS
|
||
|
||
# ----------------------------------------------------------------------------
|
||
# port number where the server should bind to
|
||
port = 8090
|
||
|
||
# optinal ssl port (https port) the server should bind to
|
||
port.ssl = 8443
|
||
|
||
# port to listen for a shutdown signal ( -1 = disable use of a shutdown port, 8005 = recommended default )
|
||
port.shutdown = -1
|
||
|
||
# prefix for new default peer names
|
||
peernameprefix=_anon
|
||
|
||
# use UPnP [true/false]
|
||
upnp.enabled = true
|
||
# remote host on UPnP device (for more than one connection)
|
||
upnp.remoteHost =
|
||
|
||
#sometimes you may want yacy to bind to another port, than the one reachable from outside.
|
||
#then set bindPort to the port yacy should bind on, and port to the port, visible from outside
|
||
#to run yacy on port 8090, reachable from port 80, set bindPort=8090, port=80 and use
|
||
#iptables -t nat -A PREROUTING -p tcp -s 192.168.24.0/16 --dport 80 -j DNAT --to 192.168.24.1:8090
|
||
#(of course you need to customize the ips)
|
||
bindPort =
|
||
|
||
# SSL support:
|
||
#
|
||
# For a German manual see http://yacy-websuche.de/wiki/index.php/De:Interface%C3%9CberHTTPS
|
||
#
|
||
# English speaking user read below:
|
||
#
|
||
# With this you can access your peer using https://localhost:8443
|
||
#
|
||
# There are two possibilities to specify which certificate should
|
||
# be used by YaCy.
|
||
#
|
||
# 1) Create a new certificate:
|
||
#
|
||
# *) For testing purposes, you can create a keystore with a self-signed certificate,
|
||
# using the following command:
|
||
# C:\> keytool -keystore mySrvKeystore -genkey -keyalg RSA -alias mycert
|
||
#
|
||
# *) Then configure the keyStoreXXXX properties accordingly, e.g.
|
||
# keyStore = c:/yacy/DATA/SETTINGS/mySrvKeystore
|
||
# keyStorePassword = mypwd
|
||
#
|
||
# 2) Import an existing certificate:
|
||
#
|
||
# Alternatively you can import an existing certificate in pkcs12 format into
|
||
# the keystore.
|
||
#
|
||
# This can be done by setting the pkcs12XXX properties accordingly, e.g.
|
||
# pkcs12ImportFile = c:/temp/keystore.pkcs12
|
||
# pkcs12ImportPwd = test
|
||
#
|
||
# If the property keyStore is not specified, then a new keystore file
|
||
# DATA/SETTINGS/myPeerKeystore will be created.
|
||
|
||
keyStore=defaults/freeworldKeystore
|
||
keyStorePassword=freeworld
|
||
pkcs12ImportFile =
|
||
pkcs12ImportPwd =
|
||
|
||
# the keyStore is only used, if server.https is set to true
|
||
# if server.https=true, then the YaCy web interface is available at
|
||
# https://localhost:<port.ssl>/ and at http://localhost:<port>/
|
||
server.https=false
|
||
|
||
# property that collects the names of all servlets that had been used so far
|
||
# that is used to track if the user has already done some configuration steps
|
||
# if the used missed configuration steps that should be done, then a help system
|
||
# is possible which leads the used based on the list of servlets that had been used
|
||
# the list distinguishes called and submitted servlets
|
||
server.servlets.called =
|
||
server.servlets.submitted =
|
||
|
||
# server tracking: maximum time a track entry is hold in the internal cache
|
||
# value is in milliseconds, default is one hour
|
||
server.maxTrackingTime = 3600000
|
||
|
||
# maximum number of tracks per host
|
||
server.maxTrackingCount = 1000
|
||
|
||
# maximum number of hosts that are tracked
|
||
server.maxTrackingHostCount = 100
|
||
|
||
# Global HTTP Referrer policy delivered by meta tag (see https://www.w3.org/TR/referrer-policy/ for available policies)
|
||
# Can be left empty : the browser should then fallback to the default "no-referrer-when-downgrade" policy
|
||
# Be careful, some policies will also affect YaCy internal links : "no-referrer", "same-origin", "origin" and "strict-origin". This can be useful
|
||
# for example when proxyURL.useforresults is set to true, to control the referrer information initially sent by YaCy proxy when visiting search results
|
||
referrer.meta.policy=origin-when-cross-origin
|
||
|
||
# maximum file sizes: since some users experience problems with too large files
|
||
# the file size of database files can be limited. Larger files can be used to get a
|
||
# better IO performance and to use less RAM; however, if the size must be limited
|
||
# because of limitations of the file system, the maximum size can be set here
|
||
filesize.max.win = 2147483647
|
||
filesize.max.other = 8589934591
|
||
|
||
# Network Definition
|
||
# There can be separate YaCy networks, and managed sub-groups of the general network.
|
||
# The essentials of the network definition are attached in separate property files.
|
||
# The property here can also be a url where the definition can be loaded.
|
||
# In case of privately managed networks, this configuration must be changed BEFORE it is released
|
||
# to the members of the separated network peers.
|
||
network.unit.definition = defaults/yacy.network.freeworld.unit
|
||
#network.unit.definition = defaults/yacy.network.webportal.unit
|
||
#network.unit.definition = defaults/yacy.network.intranet.unit
|
||
|
||
# distinguish intranet/internet IPs:
|
||
# if this setting is set to true, then only URL-Hashes with 'intranet'-Flag is created, even if the
|
||
# url is in the internet. This can be done to enhance the crawling speed dramatically since a DNS-lookup
|
||
# to check if a host is in the internet oder an intranet can be omited.
|
||
# This option is only valid if the network.unit.domain property is set to 'any'
|
||
network.unit.domain.nocheck = false
|
||
|
||
# in addition to non-dht networks a client may have its own agent name
|
||
# this option is only used if the value is non-empty and network.unit.dht = false
|
||
# that means it is not usable in YaCy p2p-configurations, only in private portal configurations
|
||
network.unit.tenant.agent =
|
||
|
||
# Update process properties
|
||
# The update server location is given in the network.unit.definition,
|
||
# but the settings for update processing and cycles are individual.
|
||
# the update process can be either 'manual' (no automatic lookup for new versions),
|
||
# 'guided' (automatic lookup, but user is asked before update is performed',
|
||
# or 'auto' (whenever an update is available, the update is loaded and installed)
|
||
update.process = manual
|
||
# the cycle value applies only if the process is automatic or guided. The value means hours.
|
||
# There is currently a fixed minimum number of hours of 24 hours for updates
|
||
update.cycle = 168
|
||
# a version number blacklist can restrict automatic or guided updates to a specific
|
||
# range of version numbers. The restriction is done with a blacklist (standard regexpr)
|
||
# It is recommended to set this list to low developer version numbers
|
||
update.blacklist =
|
||
# a update can also restricted with a concept property, which can decide if an
|
||
# update is only valid if it either is a main release or any release including new development releases
|
||
# Valid keywords are 'main' and 'any'
|
||
update.concept = any
|
||
# the following values are set automatically:
|
||
# the lookup time when the last time a lookup to the network update server(s) where done
|
||
update.time.lookup = 0
|
||
# the download time when the last time a release was downloaded
|
||
update.time.download = 0
|
||
# the deploy time when the last update was done; milliseconds since epoch
|
||
update.time.deploy = 0
|
||
# delete old downloaded files after this amount of days to free disk space
|
||
# the latest release is always kept
|
||
update.deleteOld = 30
|
||
# only install sign files
|
||
update.onlySignedFiles = 1
|
||
|
||
# restart-option
|
||
# a peer can be re-started periodically
|
||
# restart.process can be either 'off' (no automatic restart) or 'time' (time- rule-based, see below)
|
||
restart.process = off
|
||
# the restart.cycle is the number of hours that must pass before a restart is done
|
||
restart.cycle = 20
|
||
# the restart.hour is a pattern that must match with the hour string (two-digit, 24h)
|
||
# when the restart should be performed
|
||
restart.hour = 03
|
||
# the following values are set automatically
|
||
restart.time = 0
|
||
|
||
# clusters within a network:
|
||
# every network can have an unlimited number of clusters. Clusters may be also completely
|
||
# sealed and have no connection to other peers. When a cluster does not use the
|
||
# p2p protocol and the bootstraping mechanism to contact other peers, we call them
|
||
# Robinson peers. They can appear in different 'visibilities':
|
||
# - privatepeer: no connection and no data exchange to any other peer
|
||
# - privatecluster: connections only to self-defined addresses (other peers in same mode)
|
||
# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes
|
||
# - publicpeer: a single peer without cluster connection, but visible for p2p nodes
|
||
# all public robinson peers should use a peer tag string to be searcheable if in the
|
||
# search request these tags appear
|
||
cluster.mode=publicpeer
|
||
cluster.peers.yacydomain=localpeer.yacy
|
||
cluster.peers.ipport=localhost:8090
|
||
|
||
# bootstrapLoadTimeout
|
||
# this is the time-out for loading of the seedlist files during bootstraping
|
||
# If the time-out is too short, there is the danger that the peer stays in virgin mode
|
||
bootstrapLoadTimeout = 20000
|
||
|
||
# time-out of client control socket in milliseconds
|
||
# since this applies only to the client-proxy connection,
|
||
# it can be rather short
|
||
# milliseconds
|
||
clientTimeout = 10000
|
||
|
||
# maximal number of httpd sessions
|
||
# a client may open several connections at once, and the httpdMaxBusySessions value sets
|
||
# a limit on the number of concurrent connections
|
||
httpdMaxBusySessions = 200
|
||
|
||
# default root path for the file server
|
||
# may be overridden by the htdocs parameter
|
||
# users shall be encouraged to use the htdocs path for individual content,
|
||
# not this path defined here
|
||
htRootPath = htroot
|
||
|
||
# the htroot path
|
||
# root path for the httpd file server
|
||
htDefaultPath=htroot
|
||
|
||
# individual htroot folder
|
||
# every user may publicize her/his own web pages
|
||
# these pages shall be placed in the path defined here
|
||
# the htdocs path shares its content with the htroot path
|
||
htDocsPath = DATA/HTDOCS
|
||
|
||
# the default files (typically index.html), if no file name is given
|
||
# The complete path to this file is created by combination with the rootPath
|
||
# you can set a list of defaults, separated by comma
|
||
# the first one is preferred
|
||
defaultFiles = index.html,index.htm,default.html,search.html,console.html,control.html,welcome.html,wiki.html,forum.html,blog.html,email.html,content.html,monitor.html,share.html,dir.html,readme.txt
|
||
|
||
# locale-options: YaCy supports localization.
|
||
# Web pages for special languages are located in the htLocalePath
|
||
# The htLocaleLang defines a list of language options as <dir>/<named-language>
|
||
# the <dir> must exist as sub-path to htLocalePath
|
||
# the htLocaleSelection selects from the given locales, value=one-of-<dir>
|
||
locale.source=locales
|
||
locale.translated_html=DATA/LOCALE/htroot
|
||
locale.language=default
|
||
|
||
# virtual host for httpdFileServlet access
|
||
# for example http://<fileHost>/ shall access the file servlet and
|
||
# return the defaultFile at rootPath
|
||
# either way, http://<fileHost>/ denotes the same as http://localhost:<port>/
|
||
# for the preconfigured value 'localpeer', the URL is:
|
||
# http://localpeer/
|
||
fileHost = localpeer
|
||
|
||
# specify the path to the MIME matching file table
|
||
mimeTable = defaults/httpd.mime
|
||
|
||
# specify the path to the sessionid name file
|
||
sessionidNamesFile = defaults/sessionid.names
|
||
|
||
# a path to the file cache, used for the internal proxy and as crawl buffer
|
||
# This will be used if the server is addressed as a proxy
|
||
proxyCache = DATA/HTCACHE
|
||
|
||
# the maximum disc cache size for files in Cache in megabytes
|
||
# default: 4 Gigabyte
|
||
proxyCacheSize = 4096
|
||
|
||
# The compression level for cached content
|
||
# Supported values ranging from 0 - no compression (lower CPU, higher disk usage), to 9 - best compression (higher CPU, lower disk use)
|
||
proxyCache.compressionLevel = 9
|
||
|
||
# Timeout value (in milliseconds) for acquiring a synchronization lock on getContent/store Cache operations
|
||
# When timeout occurs, loader should fall back to regular remote resource loading
|
||
proxyCache.sync.lockTimeout = 2000
|
||
|
||
# you can use the proxy with fresh/stale rules or in a always-fresh mode
|
||
proxyAlwaysFresh = false
|
||
|
||
# a path to the surrogate input directory
|
||
surrogates.in = DATA/SURROGATES/in
|
||
|
||
# a path to the surrogate output directory
|
||
surrogates.out = DATA/SURROGATES/out
|
||
|
||
# a path to the dictionaries directory
|
||
# this directory also contains subdirectories for input sources, the did-you-mean function and other
|
||
dictionaries = DATA/DICTIONARIES
|
||
|
||
# a path to the classification directory
|
||
# each subdirectory is the name of a context (which becomes a navigator) with '.txt' files
|
||
# containing texts to teach a bayesian filter. One of the files must be named 'negative.txt'.
|
||
# The text files can be created with the Export functionality using the option "Only Text".
|
||
classification = DATA/CLASSIFICATION
|
||
|
||
# storage place for new releases
|
||
releases = DATA/RELEASE
|
||
|
||
# the following mime-types are a blacklist for indexing:
|
||
# parser.mime.deny: specifies mime-types that shall not be indexed
|
||
parser.mime.deny=
|
||
parser.extensions.deny=
|
||
parser.enableAudioTags=false
|
||
|
||
# experimental single-page parser for pdf files: split one pdf into individual pages;
|
||
# the key is the property name in the post arguments that gets a page number assigned,
|
||
# page numbers start with 1
|
||
parser.pdf.individualpages=false
|
||
parser.pdf.individualpages.key=page
|
||
|
||
# Promotion Strings
|
||
# These strings appear in the Web Mask of the YACY search client
|
||
# Set these Strings to cusomize your peer and give any message to
|
||
# other peer users
|
||
promoteSearchPageGreeting = Web Search by the People, for the People
|
||
# if the following property is set to true, the network name is used as greeting
|
||
promoteSearchPageGreeting.useNetworkName = false
|
||
# the following attributes can be used to define a custom image, alternative text and home page on the search page
|
||
promoteSearchPageGreeting.homepage = http://yacy.net
|
||
promoteSearchPageGreeting.imageAlt = YaCy project web site
|
||
promoteSearchPageGreeting.largeImage = env/grafics/YaCyLogo_120ppi.png
|
||
promoteSearchPageGreeting.smallImage = env/grafics/YaCyLogo_60ppi.png
|
||
|
||
# the path to the public reverse word index for text files (web pages)
|
||
# the primary path is relative to the data root, the secondary path is an absolute path
|
||
# when the secondary path should be equal to the primary, it must be declared empty
|
||
indexPrimaryPath=DATA/INDEX
|
||
|
||
# the path to index archive dumps
|
||
indexArchivePath=DATA/ARCHIVE
|
||
|
||
# the path to the LISTS files. Most lists are used to filter web content
|
||
listsPath=DATA/LISTS
|
||
|
||
# path to additional databases, like messages, blog data and bookmarks
|
||
workPath=DATA/WORK
|
||
|
||
# the path to the SKINS files.
|
||
skinPath=DATA/SKINS
|
||
|
||
# the yellow-list; URL's elements
|
||
# (the core of an URL; like 'yahoo' in 'de.yahoo.com')
|
||
# appearing in this list will not get a manipulated user agent string
|
||
proxyYellowList=yacy.yellow
|
||
|
||
# the black-list; URLs appearing in this list will not be loaded;
|
||
# instead always a 404 is returned
|
||
# all these files will be placed in the listsPath
|
||
BlackLists.Shared=url.default.black
|
||
BlackLists.DefaultList=url.default.black
|
||
|
||
#these are not needed as default. they just keep the values from beeing deleted ...
|
||
proxy.BlackLists=url.default.black
|
||
crawler.BlackLists=url.default.black
|
||
dht.BlackLists=url.default.black
|
||
search.BlackLists=url.default.black
|
||
surftips.BlackLists=url.default.black
|
||
news.BlackLists=url.default.black
|
||
|
||
# the blue-list;
|
||
# no search result is locally presented that has any word of the bluelist
|
||
# in the search words, the URL or the URL's description
|
||
plasmaBlueList=yacy.blue
|
||
|
||
# this proxy may in turn again access another proxy
|
||
# if you wish to do that, specify it here
|
||
# if you want to switch on the proxy use, set remoteProxyUse=true
|
||
# remoteProxyNoProxy is a no-proxy pattern list for the remote proxy
|
||
remoteProxyUse=false
|
||
remoteProxyUse4SSL=true
|
||
|
||
remoteProxyHost=192.168.2.2
|
||
remoteProxyPort=4239
|
||
remoteProxyUser=
|
||
remoteProxyPwd=
|
||
|
||
remoteProxyNoProxy=10\..*,127\..*,172\.(1[6-9]|2[0-9]|3[0-1])\..*,169\.254\..*,192\.168\..*,localhost,0:0:0:0:0:0:0:1
|
||
|
||
# the proxy may filter the content of transferred web pages
|
||
# the bluelist removes specific keywords from web pages
|
||
proxyBlueList=yacy.blue
|
||
|
||
# security settigns
|
||
# we provide proxy and server security through a 2-stage security gate:
|
||
# 1st stage: firewall-like access control trough ip filter for clients
|
||
# 2nd stage: password settings for proxy, server and server administrators
|
||
# by default, these settings are weak to simplify set-up and testing
|
||
# every user/adiministrator shall be encouraged to change these settings
|
||
# your can change them also online during run-time on
|
||
# http://localhost:8090/
|
||
|
||
# proxyClient: client-ip's that may connect the proxy for proxy service
|
||
# if several ip's are allowed then they must be separated by a ','
|
||
# regular expressions may be used
|
||
#proxyClient=192.168.0.4
|
||
proxyClient=localhost,127\.0\.0\.1,192\.168\..*,10\..*,0:0:0:0:0:0:0:1.*
|
||
|
||
# YaCyHop: allow public usage of proxy for yacy-protocol
|
||
# this enables usage of the internal http proxy for everyone,
|
||
# if the file path starts with /yacy/
|
||
# This is used to enable anonymization of yacy protocol requests
|
||
# Instead of asking a remote peer directly, a peer in between is asked
|
||
# to prevent that the asked peer knows which peer asks.
|
||
YaCyHop=true
|
||
|
||
# serverClient: client-ip's that may connect to the web server,
|
||
# thus are allowed to use the search service
|
||
# if you set this to another value, search requst from others
|
||
# are blocked, but you will also be blocked from using others
|
||
# search services.
|
||
serverClient=*
|
||
|
||
# use_proxyAccounts: set to true to restrict proxy-access to some identified users.
|
||
#use User_p.html to create some Users.
|
||
use_proxyAccounts=true
|
||
|
||
# adminAccountBase64MD5: a encoded user:password - pair for administration of
|
||
# settings through the web interface
|
||
# should be set to a secret. By default it is without a password
|
||
# but you are encouraged to set it to another value on the page
|
||
# http://localhost:8090/ConfigAccounts_p.html
|
||
#adminAccountBase64=MD5:{admin:realm:mysecretpassword}
|
||
adminAccountBase64MD5=
|
||
adminAccountUserName=admin
|
||
|
||
# special access handling for users from localhost:
|
||
# access from localhost may be granted with administration authority
|
||
# if this flag is set. It is set to true by default to make usage of YaCy easy
|
||
# if you use YaCy on a headless server, you should set this to false
|
||
# or configure this on http://localhost:8090/ConfigAccounts_p.html
|
||
# during the first 10 minutes of operation of YaCy;
|
||
# if the admin account password is still empty after 10 minutes a random
|
||
# password is generated an access is then ONLY from localhost, which will cause
|
||
# inaccessibility for installations on headless servers.
|
||
adminAccountForLocalhost=true
|
||
|
||
# adminAccountAllPages: if set to false, then all pages without the extension "_p" are
|
||
# accessible without authorization. Some servlets may individually decide to use or request
|
||
# administration rights. If adminAccountAllPages is set to true, then administration
|
||
# rights are needed to access all pages without any exception. Setting adminAccountAllPages
|
||
# to true therefore closes the YaCy web pages for everyone.
|
||
adminAccountAllPages=false
|
||
|
||
# adminRealm: a internal name (like a group name) for the login setting of the admin frontend
|
||
# ATTENTION: changing this name will invalidate all currently password hashes
|
||
# - With DIGEST authentication mode is this realm name of generated password hashes
|
||
# (RFC 2617 standard and recommendation). If you want to share password configuration
|
||
# with additional machines they have to belong to the same realm
|
||
# - authentication defaults to BASIC
|
||
# - and can be configured in defaults/web.xml , tag <auth-method>
|
||
#adminRealm=YaCy-AdminUI
|
||
adminRealm=The YaCy access is limited to administrators. If you don't know the password, you can change it using <yacy-home>/bin/passwd.sh <new-password>
|
||
|
||
# if you are running a principal peer, you must update the following variables
|
||
# The upload method that should be used to upload the seed-list file to
|
||
# a public accessible webserver where it can be loaded by other peers.
|
||
#
|
||
# You can set the seedUploadMethod-Property to
|
||
# - None
|
||
# - Ftp
|
||
# - File
|
||
# - Scp (only if you have installed the optional addon)
|
||
#
|
||
seedUploadMethod=none
|
||
|
||
# This is the most common method to upload the seed-list
|
||
#
|
||
# This is an ftp account with all relevant information.
|
||
# The update is only made if there had been changes in between.
|
||
seedFTPServer=
|
||
seedFTPAccount=
|
||
seedFTPPassword=
|
||
seedFTPPath=
|
||
|
||
# alternatively to an FTP account, a peer can also become a principal peer
|
||
# if the seed-list can be generated as a file and that file is also accessible from
|
||
# the internet. In this case, omit any ftp settings and set this path here.
|
||
# if this path stays empty, an ftp account is considered
|
||
# however, you must always set a seedURL because it is used to check if the
|
||
# file is actually accessible from the internet
|
||
seedFilePath=
|
||
|
||
# Settings needed to upload the seed-list file via scp
|
||
#
|
||
# Please note that this upload method can only be used if you have installed
|
||
# this optional upload method.
|
||
seedScpServer=
|
||
seedScpServerPort=
|
||
seedScpAccount=
|
||
seedScpPassword=
|
||
seedScpPath=
|
||
|
||
# every peer periodically scans for other peers. you can set the time
|
||
# of the period here (minutes)
|
||
peerCycle=2
|
||
|
||
# debug flags
|
||
debug.search.local.dht.off=false
|
||
debug.search.local.solr.off=false
|
||
debug.search.remote.dht.off=false
|
||
debug.search.remote.dht.testlocal=false
|
||
debug.search.remote.solr.off=false
|
||
debug.search.remote.solr.testlocal=false
|
||
|
||
#staticIP if you have a static IP, you can use this setting
|
||
staticIP=
|
||
|
||
# each time YaCy starts up, it can trigger the local browser to show the
|
||
# status page. This is active by default, to make it easier for first-time
|
||
# users to understand what this application does. You can disable browser
|
||
# pop-up here or set a different start page, like the search page
|
||
browserPopUpTrigger=true
|
||
browserPopUpPage=index.html
|
||
|
||
# a forward page can be given for the index.html page
|
||
# when a user accesses the index.html page, he/she is forwarded to the page
|
||
# as given by indexForward. This is by default not defined which means 'no forward'
|
||
indexForward =
|
||
|
||
# defines if the YaCy icon appears in the system tray on supported platforms
|
||
tray.icon.enabled=true
|
||
tray.icon.force=false
|
||
tray.icon.label=YaCy
|
||
tray.menu.enabled=true
|
||
|
||
# index sharing attributes: by default, sharing is on.
|
||
# If you want to use YaCy only for local indexing (robinson mode),
|
||
# you may switch this off
|
||
allowDistributeIndex=true
|
||
allowDistributeIndexWhileCrawling=false
|
||
allowDistributeIndexWhileIndexing=true
|
||
allowReceiveIndex=true
|
||
allowReceiveIndex.search=true
|
||
indexReceiveBlockBlacklist=true
|
||
|
||
# the frequency is the number of links per minute, that the peer allowes
|
||
# _every_ other peer to send to this peer
|
||
defaultWordReceiveFrequency=100
|
||
defaultLinkReceiveFrequency=30
|
||
# the default may be overridden for each peer individually, these
|
||
# settings are only available through the online interface
|
||
|
||
# prefetch parameters
|
||
# the prefetch depth assigns a specific depth to the prefetch mechanism
|
||
# prefetch of 0 means no prefetch; a prefetch of 1 means to prefetch all
|
||
# embedded URLs, but since embedded image links are loaded by the browser
|
||
# this means that only embedded anchors are prefetched additionally
|
||
# a prefetch of 2 would result in loading of all images and anchor pages
|
||
# of all embedded anchors. Be careful with this value, since even a prefetch
|
||
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
|
||
proxyPrefetchDepth=0
|
||
proxyStoreHTCache=true
|
||
proxyIndexingRemote=false
|
||
proxyIndexingLocalText=true
|
||
proxyIndexingLocalMedia=true
|
||
|
||
# proxy usage only for .yacy-Domains for autoconfig
|
||
proxyYacyOnly=false
|
||
|
||
# enable proxy via url (/proxy.html?url=http://yacy.net)
|
||
proxyURL=false
|
||
proxyURL.access=127.0.0.1,0:0:0:0:0:0:0:1
|
||
# which urls to rewrite to /proxy.html?url=x (values: all, domainlist)
|
||
proxyURL.rewriteURLs=domainlist
|
||
proxyURL.useforresults=false
|
||
|
||
# Autocrawl configuration
|
||
autocrawl=false
|
||
autocrawl.index.text=true
|
||
autocrawl.index.media=true
|
||
autocrawl.ratio=50
|
||
autocrawl.rows=100
|
||
autocrawl.days=1
|
||
autocrawl.query=*:*
|
||
autocrawl.deep.depth=3
|
||
autocrawl.shallow.depth=1
|
||
|
||
# From the 'IndexCreate' menu point you can also define a crawling start point.
|
||
# The crawling works the same way as the prefetch, but it is possible to
|
||
# assign a different crawling depth.
|
||
# Be careful with this number. Consider a branching factor of average 20;
|
||
# A prefetch-depth of 8 would index 25.600.000.000 pages, maybe the whole WWW.
|
||
crawlingDepth=3
|
||
crawlingDirectDocByURL=true
|
||
crawlingIfOlder=-1
|
||
crawlingDomFilterDepth=-1
|
||
crawlingDomMaxPages=-1
|
||
indexText=true
|
||
indexMedia=true
|
||
|
||
# Filter for crawling; may be used to restrict a crawl to a specific domain
|
||
# URLs are only indexed and further crawled if they match this filter
|
||
crawlingFilter=.*
|
||
crawlingQ=true
|
||
followFrames=true
|
||
obeyHtmlRobotsNoindex=true
|
||
obeyHtmlRobotsNofollow=false
|
||
storeHTCache=true
|
||
storeTXCache=true
|
||
|
||
# peers may initiate remote crawling tasks.
|
||
# every peer may allow or disallow to be used as crawling-peer;
|
||
# you can also set a maximum crawl depth that can be requested or accepted
|
||
# order=parameters for requester; response=parameters for responder
|
||
# these values apply only for senior-senior - communication
|
||
# The delay value is number of seconds bewteen two separate orders
|
||
# crawlOrder: default value for remote crawl starts
|
||
# crawlResponse: set to true if a peer should retrieve remote crawl urls from other peers
|
||
crawlOrder=true
|
||
crawlOrderDepth=0
|
||
crawlResponse=false
|
||
crawlResponseDepth=0
|
||
|
||
# indexing-exclusion - rules
|
||
# There rules are important to reduce the number of words that are indexed
|
||
# We distinguish three different sets of stop-words:
|
||
# static - excludes all words given in the file yacy.stopwords from indexing,
|
||
# dynamic - excludes all words from indexing which are listed by statistic rules,
|
||
# parental - excludes all words from indexing which had been indexed in the parent web page.
|
||
xsstopw=true
|
||
xdstopw=true
|
||
xpstopw=true
|
||
|
||
# Topwords filtering
|
||
# If set to true, all stopwords (stopwords.yacy) are filtered from the topwords
|
||
# Change to false if requesting hits from peers with modified stopwords-file and using the unchanged client-version
|
||
filterOutStopwordsFromTopwords=true
|
||
|
||
# crawling steering: must-match/must-not-match
|
||
crawlingIPMustMatch=.*
|
||
crawlingIPMustNotMatch=
|
||
# the default country codes are all codes for countries in Europe
|
||
crawlingCountryMustMatch=AD,AL,AT,BA,BE,BG,BY,CH,CY,CZ,DE,DK,EE,ES,FI,FO,FR,GG,GI,GR,HR,HU,IE,IM,IS,IT,JE,LI,LT,LU,LV,MC,MD,MK,MT,NL,NO,PL,PT,RO,RU,SE,SI,SJ,SK,SM,TR,UA,UK,VA,YU
|
||
|
||
# collections for index data separation
|
||
# these collections can either be used to produce search tenants.
|
||
# The collection is used in the site-parameter in the GSA interface.
|
||
# Collections are assigned during crawl-time and defined in the crawl start.
|
||
# The YaCyScheme field collection_sxt must be switched on to use this field.
|
||
collection=user
|
||
|
||
# performance-settings
|
||
# delay-times for permanent loops (milliseconds)
|
||
# the idlesleep is the pause that an proces sleeps if the last call to the
|
||
# process job was without execution of anything;
|
||
# the busysleep is the pause after a full job execution
|
||
# the prereq-value is a memory pre-requisite: that much bytes must
|
||
# be available/free in the heap; othervise the loop is not executed
|
||
# and another idlesleep is performed
|
||
20_dhtdistribution_idlesleep=30000
|
||
20_dhtdistribution_busysleep=15000
|
||
20_dhtdistribution_memprereq=12582912
|
||
20_dhtdistribution_loadprereq=2.0
|
||
30_peerping_idlesleep=30000
|
||
30_peerping_busysleep=30000
|
||
30_peerping_memprereq=2097152
|
||
30_peerping_loadprereq=4.0
|
||
40_peerseedcycle_idlesleep=1800000
|
||
40_peerseedcycle_busysleep=1200000
|
||
40_peerseedcycle_memprereq=4194304
|
||
40_peerseedcycle_loadprereq=2.0
|
||
50_localcrawl_idlesleep=2000
|
||
50_localcrawl_busysleep=10
|
||
50_localcrawl_memprereq=25165824
|
||
50_localcrawl_loadprereq=6.0
|
||
50_localcrawl_isPaused=false
|
||
55_autocrawl_idlesleep=10000
|
||
55_autocrawl_busysleep=10000
|
||
55_autocrawl_memprereq=25165824
|
||
55_autocrawl_loadprereq=6.0
|
||
60_remotecrawlloader_idlesleep=4000
|
||
60_remotecrawlloader_busysleep=800
|
||
60_remotecrawlloader_memprereq=12582912
|
||
60_remotecrawlloader_loadprereq=8.0
|
||
60_remotecrawlloader_isPaused=false
|
||
62_remotetriggeredcrawl_idlesleep=2000
|
||
62_remotetriggeredcrawl_busysleep=200
|
||
62_remotetriggeredcrawl_memprereq=12582912
|
||
62_remotetriggeredcrawl_loadprereq=8.0
|
||
62_remotetriggeredcrawl_isPaused=false
|
||
70_surrogates_idlesleep=10000
|
||
70_surrogates_busysleep=0
|
||
70_surrogates_memprereq=12582912
|
||
70_surrogates_loadprereq=8.0
|
||
720_ccimport_idlesleep=100
|
||
720_ccimport_busysleep=1000
|
||
720_ccimport_memprereq=1048576
|
||
720_ccimport_loadprereq=8.0
|
||
730_ccfilter_idlesleep=100
|
||
730_ccfilter_busysleep=1000
|
||
730_ccfilter_memprereq=1048576
|
||
730_ccfilter_loadprereq=8.0
|
||
|
||
85_scheduler_idlesleep=60000
|
||
85_scheduler_busysleep=60000
|
||
85_scheduler_memprereq=1048576
|
||
85_scheduler_loadprereq=4.0
|
||
90_cleanup_idlesleep=300000
|
||
90_cleanup_busysleep=300000
|
||
90_cleanup_memprereq=0
|
||
90_cleanup_loadprereq=16.0
|
||
|
||
reindexSolr_idlesleep=1000
|
||
reindexSolr_busysleep=1
|
||
reindexSolr_memprereq=10485760
|
||
reindexSolr_loadprereq=9.0
|
||
|
||
# additional attributes:
|
||
# performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
|
||
# is used to flush the RAM cache, which is the major part of the IO in YaCy
|
||
performanceProfile=defaults/yacy.init
|
||
performanceSpeed=100
|
||
performanceIO=10
|
||
|
||
# cleanup-process:
|
||
# properties for tasks that are performed during cleanup
|
||
cleanup.deletionProcessedNews = true
|
||
cleanup.deletionPublishedNews = true
|
||
cleanup.failedSearchURLtimeout = 86400000
|
||
|
||
|
||
# default memory settings for startup of yacy
|
||
# is valid in unix/shell and windows environments but
|
||
# not for first startup of YaCy
|
||
|
||
# -Xmx<size> and -Xms<size> maximum/init Java heap size
|
||
# if a high performance for large search indexes is wanted, then setting the values to equal number is recommended
|
||
# if YaCy shall be nice in not-only-yacy environments, then the Xms value may be lower
|
||
javastart_Xmx=Xmx600m
|
||
javastart_Xms=Xms90m
|
||
|
||
# YaCy is able to use RAM copies of database tables. This needs a lot of RAM.
|
||
# To switch on copying of file tables int RAM, there must be enough memory
|
||
# The memory that is available at startup time is used to switch the feature on
|
||
# The tableCachingLimit is the amount of free RAM at startup time to switch on the feature
|
||
tableCachingLimit=419430400
|
||
|
||
# some java versions may be limited to a specific array size
|
||
# of 134217727 entries. To prevent that tables of that size are generated,
|
||
# set this property to false
|
||
# If you want to have better performance and switch ramcopy on, try also to
|
||
# set this property to true
|
||
# this value is automatically set to true, if more than two gigabyte is available
|
||
exceed134217727=false
|
||
|
||
# priority of the yacy-process
|
||
# is valid in unix/shell and windows environments but
|
||
# not for first startup of YaCy
|
||
# UNIX: corresponds to the nice-level
|
||
# WIN: -20=realtime;-15=high;-10=above;0=normal;10=below;20=low
|
||
javastart_priority=10
|
||
|
||
# performance properties for the word index cache
|
||
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
|
||
# ram cache during indexing. If you want to increase indexing speed, increase this
|
||
# value i.e. up to one million, but increase also the memory limit to a minimum of 2GB
|
||
wordCacheMaxCount = 50000
|
||
|
||
# Specifies if yacy can be used as transparent http proxy.
|
||
#
|
||
# Please note that you also have to reconfigure your firewall
|
||
# before you can use yacy as transparent proxy. On linux this
|
||
# can be done like this:
|
||
# iptables -t nat -A PREROUTING -p tcp -s 192.168.0.0/16 \
|
||
# --dport 80 -j DNAT --to 192.168.0.1:8090
|
||
#
|
||
# With this iptables filter listed above all http traffic that
|
||
# comes from your private network (in this case 192.168.0.0)
|
||
# and goes to any webserver listening on port 80 will be forwarded
|
||
# by the firewall to yacy running on port 8090 (192.168.0.1:8090)
|
||
isTransparentProxy=false
|
||
|
||
# Specifies the timeout the proxy sould use
|
||
proxy.clientTimeout = 60000
|
||
|
||
# Specifies if the proxy should send the via header according to RFC
|
||
proxy.sendViaHeader=true
|
||
|
||
# Specifies if the proxy should send the X-Forwarded-For header
|
||
proxy.sendXForwardedForHeader=true
|
||
|
||
# Enable cookie monitoring
|
||
proxy.monitorCookies=false
|
||
|
||
# msgForwarding: Specifies if yacy should forward received messages via
|
||
# email to the configured email address
|
||
msgForwardingEnabled=false
|
||
msgForwardingCmd=/usr/sbin/sendmail
|
||
msgForwardingTo=root@localhost
|
||
|
||
#crawlPause: delay time after specific functions before crawling is resumed
|
||
crawlPause.proxy=10
|
||
crawlPause.localsearch=50
|
||
crawlPause.remotesearch=10
|
||
|
||
# Some configuration values for the crawler
|
||
crawler.clientTimeout=30000
|
||
|
||
# http crawler specific settings; size in bytes
|
||
crawler.http.accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
|
||
crawler.http.acceptEncoding=gzip
|
||
crawler.http.acceptLanguage=en-us,en;q=0.5
|
||
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
||
crawler.http.maxFileSize=10485760
|
||
crawler.http.FollowRedirects=true
|
||
crawler.http.RecordRedirects=false
|
||
|
||
# ftp crawler specific settings; size in bytes
|
||
crawler.ftp.maxFileSize=10485760
|
||
|
||
# smb crawler specific settings: maximum size
|
||
crawler.smb.maxFileSize=100000000
|
||
|
||
# smb crawler specific settings: maximum size
|
||
crawler.file.maxFileSize=100000000
|
||
|
||
# maximum number of crawler threads
|
||
crawler.MaxActiveThreads = 200
|
||
|
||
# maximum number of same hosts in crawler threads
|
||
crawler.MaxSameHostInQueue = 20
|
||
|
||
# default latency is the start value of the average of remote server response time
|
||
crawler.defaultAverageLatency = 500
|
||
|
||
# the latency factor is a factor that is applied to the average remote server latency.
|
||
# The result is the minimum remote server access delay time
|
||
crawler.latencyFactor = 0.5
|
||
|
||
# The onDemandLimit is the maximum number of crawl queues that are concurrently opened
|
||
# at the same time. If the number of hosts exceeds this number, onDemand queues are opened
|
||
# which are opened each time a queue is accessed which creates high IO load. On the other
|
||
# hand, having too many entries in onDemandLimit may exceed the maximum number of file
|
||
# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number
|
||
# defined here
|
||
crawler.onDemandLimit = 1000
|
||
|
||
# maximum size of indexing queue
|
||
indexer.slots = 100
|
||
|
||
# maximum size of stacker queue
|
||
stacker.slots = 2000
|
||
|
||
# search options: show advanced options on main search page
|
||
search.options = true
|
||
|
||
# search domains. If set to false then that search is not available
|
||
search.text = true
|
||
search.image = true
|
||
search.audio = false
|
||
search.video = false
|
||
search.app = false
|
||
|
||
# number of search results per page displayed by default
|
||
search.items = 10
|
||
|
||
# target for search results; this is the href target attribute inside every search result link
|
||
# possible values:
|
||
# "_blank" (new window), "_self" (same window), "_parent" (the parent frame of a frameset),
|
||
# "_top" (top of all frames), "searchresult" (a default custom page name for search results)
|
||
# a special pattern can be given for exceptions to the default target according to urls
|
||
search.target = _self
|
||
search.target.special = _self
|
||
search.target.special.pattern =
|
||
|
||
# When true, override the global referrer.meta.policy value and add the standard noreferrer link type to search results links :
|
||
# this instructs the browser that it should not send any referrer information at all when visiting them
|
||
# Be careful : some websites might reject requests with no referrer
|
||
# Supported by more browsers than the meta referrer tag
|
||
search.result.noreferrer=false
|
||
|
||
# search result lines may show additional information for each search hit
|
||
# these information pieces may be switched on or off
|
||
search.result.show.keywords = false
|
||
search.result.show.date = true
|
||
search.result.show.size = false
|
||
search.result.show.metadata = false
|
||
search.result.show.parser = false
|
||
search.result.show.citation = true
|
||
search.result.show.pictures = false
|
||
search.result.show.cache = true
|
||
search.result.show.proxy = false
|
||
search.result.show.hostbrowser = true
|
||
search.result.show.vocabulary = false
|
||
search.result.show.vocabulary.omit =
|
||
search.result.show.snapshots = false
|
||
# when true, display the raw ranking score value
|
||
search.result.show.ranking = false
|
||
|
||
|
||
# search navigators: comma-separated list of default values for search navigation.
|
||
# can be temporary different if search string is given with differen navigation values
|
||
# assigning no value(s) means that no navigation is shown
|
||
search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language
|
||
#search.navigation=location,hosts,authors,namespace,topics,filetype,protocol,language,collections,date,year,year:dates_in_content_dts:Event
|
||
|
||
# max number of items displayed in search navigators
|
||
search.navigation.maxcount=100
|
||
|
||
# max number of items displayed in the dates navigator
|
||
search.navigation.dates.maxcount=640
|
||
|
||
# search result verification and snippet fetch caching rules
|
||
# each search result can be verified byloading the link from the web
|
||
# this can be enhanced using a cache. In some cases it may be appropriate
|
||
# to not verify the link at all and do not compute a snippet
|
||
# the possible cases are:
|
||
# nocache: no use of web cache, load all snippets online
|
||
# iffresh: use the cache if the cache exists and is fresh otherwise load online
|
||
# ifexist: use the cache if the cache exist or load online
|
||
# cacheonly: never go online, use all content from cache. If no cache entry exist,
|
||
# consider content nevertheless as available and show result without snippet
|
||
# false: no link verification and not snippet generation:
|
||
# all search results are valid without verification
|
||
search.verify = ifexist
|
||
|
||
search.excludehosts=
|
||
search.excludehosth=
|
||
|
||
# in case that a link verification fails then the corresponding index reference can be
|
||
# deleted to clean up the index. If this property is set then failed index verification in
|
||
# the cases of nocache, iffresh and ifexist causes an index deletion
|
||
search.verify.delete = true
|
||
|
||
# If enabled, the results are sorted in the browser using Javascript.
|
||
# This usually improves ranking accuracy, but doesn't work well for users
|
||
# who have Javascript disabled, are using screen readers, or are on slow computers.
|
||
search.jsresort = false
|
||
|
||
# remote search details
|
||
remotesearch.maxcount = 10
|
||
remotesearch.maxtime = 3000
|
||
remotesearch.result.store=true
|
||
# Maximum size allowed (in bytes) for a remote document result to be stored to local index. Defaults to -1, which means no limit.
|
||
remotesearch.result.store.maxsize=-1
|
||
remotesearch.maxload.rwi=8.0
|
||
remotesearch.maxload.solr=4.0
|
||
|
||
# Control whether remote Solr instances responses should be binary encoded. Responses are transferred as XML when set to false.
|
||
remote.solr.binaryResponse.enabled=true
|
||
|
||
# specifies if yacy should set it's own referer if no referer URL
|
||
# was set by the client.
|
||
useYacyReferer = false
|
||
|
||
# specifies if the http post body should be transfered
|
||
# using content-encoding gzip during index transfer
|
||
# a) indexDistribution: which is done periodically if you have enabled
|
||
# Index Distribution via IndexControl_p.html
|
||
# b) indexTransfer: which can be used to transfer the whole index of a peer
|
||
# this can be started via IndexTransfer_p.html
|
||
# c) indexControl: which can be triggered manually via IndexControl_p.html to
|
||
# transfer a chosen subset of the peer index
|
||
indexDistribution.gzipBody = true
|
||
indexTransfer.gzipBody = true
|
||
indexControl.gzipBody = true
|
||
|
||
# defining timeouts for index- transfer/distribution/control
|
||
indexControl.timeout = 60000
|
||
indexDistribution.timeout = 60000
|
||
indexTransfer.timeout = 120000
|
||
|
||
# defining max. allowed amount of open files during index- transfer/distribution
|
||
indexDistribution.maxOpenFiles = 800
|
||
indexTransfer.maxOpenFiles = 800
|
||
|
||
# sizes for index distribution
|
||
indexDistribution.minChunkSize = 10
|
||
indexDistribution.maxChunkSize = 1000
|
||
indexDistribution.startChunkSize = 200
|
||
indexDistribution.maxChunkFails = 1
|
||
|
||
# limit of references per term & blob to the younges of this value
|
||
# a value of <= 0 disables this feature (no limit)
|
||
# a value of e.g. 100000 can improve stability and reduce load while searching very popular words
|
||
index.maxReferences = 0
|
||
|
||
# Search sequence settings
|
||
# collection:
|
||
# time = time to get a RWI out of RAM cache, assortments and WORDS files
|
||
# count = maximum number of RWI-entries that shall be collected
|
||
#
|
||
# join:
|
||
# time = time to perform the join between all collected RWIs
|
||
# count = maximum number of entries that shall be joined
|
||
#
|
||
# presort:
|
||
# time = time to do a sort of the joined URL-records
|
||
# count = maximum number of entries that shall be pre-sorted
|
||
#
|
||
# urlfetch:
|
||
# time = time to fetch the real URLs from the LURL database
|
||
# count = maximum number of urls that shall be fetched
|
||
#
|
||
# postsort:
|
||
# time = time for final sort of URLs
|
||
# count = maximum number oof URLs that shall be retrieved during sort
|
||
#
|
||
# filter:
|
||
# time = time to filter out unwanted urls (like redundant urls)
|
||
# count = maximum number of urls that shall be filtered
|
||
#
|
||
# snippetfetch:
|
||
# time = time to fetch snippets for selected URLs
|
||
# count = maximum number of snipptes to be fetched
|
||
#
|
||
# all values are percent
|
||
# time-percent is the percent of total search time
|
||
# count-percent is the percent of total wanted urls in result
|
||
# we distinguish local and remote search times
|
||
searchProcessLocalTime_c = 44
|
||
searchProcessLocalCount_c = 10000000
|
||
searchProcessLocalTime_j = 8
|
||
searchProcessLocalCount_j = 1000000
|
||
searchProcessLocalTime_r = 8
|
||
searchProcessLocalCount_r =100000
|
||
searchProcessLocalTime_u = 20
|
||
searchProcessLocalCount_u = 10000
|
||
searchProcessLocalTime_o = 10
|
||
searchProcessLocalCount_o = 100
|
||
searchProcessLocalTime_f = 5
|
||
searchProcessLocalCount_f = 100
|
||
searchProcessLocalTime_s = 5
|
||
searchProcessLocalCount_s = 30
|
||
|
||
searchProcessRemoteTime_c = 44
|
||
searchProcessRemoteCount_c = 1000000
|
||
searchProcessRemoteTime_j = 8
|
||
searchProcessRemoteCount_j = 1000000
|
||
searchProcessRemoteTime_r = 8
|
||
searchProcessRemoteCount_r = 1000
|
||
searchProcessRemoteTime_u = 20
|
||
searchProcessRemoteCount_u = 1000
|
||
searchProcessRemoteTime_o = 10
|
||
searchProcessRemoteCount_o = 1000
|
||
searchProcessRemoteTime_f = 5
|
||
searchProcessRemoteCount_f = 100
|
||
searchProcessRemoteTime_s = 5
|
||
searchProcessRemoteCount_s = 10
|
||
|
||
# timeouts for snippet fetching in ms
|
||
# timeout_text is for text-snippets, timeout_media for media, e.g. images
|
||
timeout_text = 10000
|
||
timeout_media = 15000
|
||
|
||
# a list of domain name patterns that should not be cached by the httpc dns cache
|
||
httpc.nameCacheNoCachingPatterns = .*.ath.cx,.*.blogdns.*,.*.boldlygoingnowhere.org,.*.dnsalias.*,.*.dnsdojo.*,.*.dvrdns.org,.*.dyn-o-saur.com,.*.dynalias.*,.*.dyndns.*,.*.ftpaccess.cc,.*.game-host.org,.*.game-server.cc,.*.getmyip.com,.*.gotdns.*,.*.ham-radio-op.net,.*.hobby-site.com,.*.homedns.org,.*.homeftp.*,.*.homeip.net,.*.homelinux.*,.*.homeunix.*,.*.is-a-chef.*,.*.is-a-geek.*,.*.kicks-ass.*,.*.merseine.nu,.*.mine.nu,.*.myphotos.cc,.*.podzone.*,.*.scrapping.cc,.*.selfip.*,.*.servebbs.*,.*.serveftp.*,.*.servegame.org,.*.shacknet.nu
|
||
|
||
#externalRedirectors
|
||
#squid Redirector compatible
|
||
externalRedirector=
|
||
|
||
# the Yacy Version this config was created with
|
||
Version=
|
||
# old version value (keep to allow conversion of .conf, until next main releas > 1.83)
|
||
svnRevision=0
|
||
|
||
currentSkin=pdbootstrap
|
||
|
||
# flag to show if pages shall be usable for non-admin users
|
||
# this can be applied to the Surftips.html, yacysearch.html and HostBrowser.html pages
|
||
publicSurftips = true
|
||
publicSearchpage = true
|
||
|
||
# flag to show if the top navigation bar shall be shown to all users
|
||
# if this is disabled, then the user must navigate manually from the search page
|
||
# to /Status.html to get the main memu bar back
|
||
publicTopmenu = true
|
||
|
||
# Wiki access rights
|
||
# the built-in wiki system allows by default only that the administrator is allowed to make changes
|
||
# this can be changed. There are three options:
|
||
# admin - only the admin has write right
|
||
# all - everybody has write right
|
||
# user - the admin and every user registered in the user db has write right
|
||
WikiAccess = admin
|
||
|
||
# Search Profiles
|
||
# we will support different search profiles
|
||
# If this profile setting is empty, a hard-coded profile is used to initialise the values
|
||
search.ranking.rwi.profile =
|
||
# The boost fields contains all fields which shall be searched together with a boost. non-mentioned fields are not searched.
|
||
|
||
# Boost queries are added to all queries; functions evaluate a value which is either added or multiplied with the ranking.
|
||
# The field boostfunctionmode can be either 'add' or 'multiply' to describe the mode.
|
||
# All boost methods > 0 must have names to be able to select this name with a query, with the syntax /name
|
||
# The bostfields setting is of special importance as these are the fields used to query for search terms
|
||
search.ranking.solr.collection.boostname.tmpa.0=Default Profile
|
||
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0
|
||
search.ranking.solr.collection.filterquery.tmpa.0=
|
||
search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4
|
||
search.ranking.solr.collection.boostfunction.tmpb.0=
|
||
search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage
|
||
search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1
|
||
search.ranking.solr.collection.filterquery.tmpa.1=
|
||
search.ranking.solr.collection.boostquery.tmpa.1=
|
||
search.ranking.solr.collection.boostfunction.tmpb.1=recip(ms(NOW,last_modified),3.16e-11,1,1)
|
||
search.ranking.solr.collection.boostname.tmpa.2=Intranet Profile: when a search is done on a single domain only, i.e. if a site:-operator is used
|
||
search.ranking.solr.collection.boostfields.tmpa.2=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,h3_txt^2.0,keywords^2.0,description_txt^1.5,author^1.0
|
||
search.ranking.solr.collection.filterquery.tmpa.2=
|
||
search.ranking.solr.collection.boostquery.tmpa.2=fuzzy_signature_unique_b:true^10.0
|
||
search.ranking.solr.collection.boostfunction.tmpb.2=
|
||
search.ranking.solr.collection.boostname.tmpa.3=_unused3
|
||
search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0
|
||
search.ranking.solr.collection.filterquery.tmpa.3=
|
||
search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4
|
||
search.ranking.solr.collection.boostfunction.tmpb.3=
|
||
|
||
# the following values are used to identify duplicate content
|
||
search.ranking.solr.doubledetection.minlength=3
|
||
search.ranking.solr.doubledetection.quantrate=0.5f
|
||
|
||
# Another attribute for double content is a 'greedy' ignoring of a http url is present for each https and vice versa
|
||
# The same may be true for documents with leading 'www.' subdomain and without.
|
||
# The following attributes will cause that https is preferred over http and with-www is preferred over without-www
|
||
search.ranking.uniqueheuristic.preferhttps = false
|
||
search.ranking.uniqueheuristic.preferwwwprefix = true
|
||
|
||
#optional extern thumbnail program.
|
||
#the program must accept the invocation PROGRAM http://url /path/to/filename
|
||
thumbnailProgram =
|
||
|
||
# settings for the peer's local robots.txt
|
||
# the following restrictions are possible (comma-separated):
|
||
# - all : entire domain is disallowed
|
||
# - blog : the blog-pages
|
||
# - bookmarks : the bookmark-page
|
||
# - dirs : all directories in htroot (standard setting, as there is no usable information in)
|
||
# - fileshare : all files in the peer's file share (DATA/HTDOCS/share)
|
||
# - homepage : all files on the peer's home page (DATA/HTDOCS/www)
|
||
# - locked : all servlets ending on '_p.*' (standard setting, as robots would need a password to access them anyways)
|
||
# - news : the news-page
|
||
# - network : the network-pages
|
||
# - status : peer's status page
|
||
# - surftips : the surftips-page
|
||
# - wiki : the wiki-page
|
||
httpd.robots.txt = locked,dirs,bookmarks,network,news,status,profile
|
||
|
||
# maximum number of robots.txt loading threads
|
||
robots.txt.MaxActiveThreads = 200
|
||
|
||
# class to use for parsing wikicode
|
||
wikiParser.class = de.anomic.data.wikiCode
|
||
|
||
# settings for automatic deletion of old entries in passive and potential seed-db
|
||
# time means max time (in days) a peer may not have been seen before it is deleted
|
||
routing.deleteOldSeeds.permission = true
|
||
routing.deleteOldSeeds.time = 30
|
||
|
||
# options to remember the default search engines when using the search compare features
|
||
compare_yacy.left = YaCy
|
||
compare_yacy.right = startpage.com
|
||
|
||
# minimum free disk space for crawling (MiB)
|
||
disk.free = 3000
|
||
# minimum for DHT
|
||
disk.free.hardlimit = 1000
|
||
|
||
# ResourceObserver settings
|
||
# We apply the naming of control circuit states to resources observer limit values (steady-state value, over/undershot)
|
||
# under/overshot states in the system are supposed to be regulated to match the steady-state value
|
||
|
||
# autoregulation of resource states
|
||
# ATTENTION: be aware that using the autoregulate-option causes that the search index data is DELETED as soon as threshold-values are reached!
|
||
# the autoregulate function starts workin if resources reach over/undershot values and the auto-regulation tries to regulate to the steadystate value
|
||
resource.disk.free.autoregulate=true
|
||
resource.disk.used.autoregulate=false
|
||
|
||
# the target steady-state of minimum disk space left (MB)
|
||
resource.disk.free.min.steadystate=4096
|
||
|
||
# the undershot below the steady-state of minimum disk free as absolute size (MB)
|
||
resource.disk.free.min.undershot=2048
|
||
|
||
# the target steady-state of maximum disk space for YaCy (MB)
|
||
resource.disk.used.max.steadystate=2097152
|
||
|
||
# the overshot above the steady-state of disk space for YaCy (absolute) (MB)
|
||
resource.disk.used.max.overshot=4194304
|
||
|
||
# minimum memory to accept dht-in (MiB)
|
||
memory.acceptDHTabove = 50
|
||
memory.disabledDHT = false
|
||
|
||
# wether using standard memory strategy - or try generation memory strategy
|
||
memory.standardStrategy = true
|
||
|
||
# content integration settings
|
||
content.phpbb3.urlstub = http://<mydomain>/
|
||
content.phpbb3.dbtype = mysql
|
||
content.phpbb3.dbhost = localhost
|
||
content.phpbb3.dbport = 3306
|
||
content.phpbb3.dbname = forum
|
||
content.phpbb3.tableprefix = phpbb_
|
||
content.phpbb3.dbuser = notroot
|
||
content.phpbb3.dbpw = joshua
|
||
content.phpbb3.ppf = 1000
|
||
content.phpbb3.dumpfile =
|
||
|
||
# search engine teaser: an about box in search results
|
||
# this is only shown, if the about.body is filled
|
||
about.headline=Please support YaCy!
|
||
about.body=<iframe src="env/donate.html" width="100%" height="90" border="0" frameborder="0" scrolling="no" hspace="0" vspace="0" name="donate"></iframe><br/><div style="padding:8px;">If you run a YaCy server, feel free to replace our donation plea with your own support message, use the <a href="ConfigPortal_p.html">Portal Configuration</a> servlet.</div>
|
||
|
||
donation.iframesource=http://yacy.net/include/donate.html
|
||
donation.iframetarget=env/donate.html
|
||
|
||
# search heuristics
|
||
heuristic.site = false
|
||
heuristic.searchresults = false
|
||
heuristic.searchresults.crawlglobal = false
|
||
heuristic.opensearch = false
|
||
|
||
# colours for generic design
|
||
# white
|
||
color_background = #FFFFFF
|
||
|
||
# dark blue/grey
|
||
color_text = #18294A
|
||
|
||
# success/green
|
||
color_legend = #5cb85c
|
||
|
||
# brand/blue
|
||
color_tableheader = #84B3DE
|
||
|
||
# dark/light grey (for tables)
|
||
color_tableitem = #dddddd
|
||
color_tableitem2 = #eeeeee
|
||
|
||
# light red
|
||
color_tablebottom = #F2DEDE
|
||
|
||
color_borderline = #888888
|
||
color_signbad = #990000
|
||
color_signgood = #009900
|
||
color_signother = #000099
|
||
|
||
# dark blue
|
||
color_searchheadline = #2145ca
|
||
|
||
# green / success/3*2
|
||
color_searchurl = #1c65ba
|
||
color_searchurlhover = #1c65ba
|
||
|
||
|
||
# federated index storage and federated search functionality
|
||
# federated search means that other search engines may be used together with the built-in indexing.
|
||
# each federated search may be able to be used as remote indexing service and/or as remote search service.
|
||
# a typical use case for a federated search is a concurrent search from opensearch sources.
|
||
# a typical use case for a remote indexing service is a remote solr index. YaCy supports remote solr indexes.
|
||
|
||
# solr indexes can be filled if enabled is set to true
|
||
# the remote index scheme is the same as produced by the SolrCell; see http://wiki.apache.org/solr/ExtractingRequestHandler
|
||
# because this default scheme is used the default example scheme can be used as solr configuration
|
||
# to use this, do the following:
|
||
# - set federated.service.solr.indexing.enabled = true
|
||
# - download solr from http://www.apache.org/dyn/closer.cgi/lucene/solr/
|
||
# - extract the solr (3.1) package, 'cd example' and start solr with 'java -jar start.jar'
|
||
# - start yacy and then start a crawler. The crawler will fill both, YaCy and solr indexes.
|
||
# - to check whats in solr after indexing, open http://localhost:8983/solr/admin/
|
||
federated.service.solr.indexing.enabled = false
|
||
federated.service.solr.indexing.url = http://127.0.0.1:8983/solr
|
||
federated.service.solr.indexing.sharding = MODULO_HOST_MD5
|
||
# the lazy attribute causes that fields containing "" or 0 are not added and not written
|
||
federated.service.solr.indexing.lazy = true
|
||
federated.service.solr.indexing.timeout = 60000
|
||
federated.service.solr.indexing.writeEnabled = true
|
||
|
||
# temporary definition of backend services to use.
|
||
# After the migration a rwi+solr combination is used, the solr contains the content of the previously used metadata-db.
|
||
# To get a handle for a migration, these values are defined as temporary, if the migration starts the values are renamed
|
||
# and defined with different default values.
|
||
# The citation service is used for ranking; this is a reverse linking index. It should be on before and after the migration.
|
||
# It can be switched off if only a remote solr index is used.
|
||
core.service.fulltext = true
|
||
core.service.rwi.tmp = true
|
||
core.service.citation.tmp = true
|
||
core.service.webgraph.tmp = false
|
||
|
||
# Augmentation settings
|
||
parserAugmentation = false
|
||
parserAugmentation.RDFa = false
|
||
|
||
# Content control settings
|
||
contentcontrol.enabled = false
|
||
contentcontrol.bookmarklist = contentcontrol
|
||
contentcontrol.mandatoryfilterlist = yacy
|
||
contentcontrol.smwimport.enabled = false
|
||
contentcontrol.smwimport.baseurl =
|
||
contentcontrol.smwimport.purgelistoninit = true
|
||
contentcontrol.smwimport.targetlist = contentcontrol
|
||
contentcontrol.smwimport.defaultcategory = yacy
|
||
|
||
# host browser settings
|
||
# Allow the administrator to stack URLs to the local crawl queue from the host browser page, automatically (when a path is unknown) or manually through a "load and index" link
|
||
browser.autoload = false
|
||
# Allow everyone to stack URLs to the local crawl queue. "browser.autoload" has also to be set to true to enable automatic loading on an unknown path.
|
||
browser.load4everyone = false
|
||
|
||
# greedy learning: fast information acquisition heuristic for new peers
|
||
# to make greedy learning work, it must be enabled in the network definition
|
||
# the user may switch it off at any time, but if the automatic learning limit is reached
|
||
# then the active flag is set to false automatically and this will switch to that state
|
||
# automatically by the cleanup process each time if the user switches it on again.
|
||
# While the switch in on, it will cause that the user-submitted search will be done along
|
||
# with some heuristics like: loading linked documents and adding a twitter search.
|
||
# When the learning mode is finished, the user may switch on individual heuristics by himself.
|
||
greedylearning.active = true
|
||
|
||
# postprocessing steering
|
||
postprocessing.maximum_load = 2.5
|
||
postprocessing.minimum_ram = 536870912
|
||
postprocessing.partialUpdate = true
|
||
|
||
# Custom user agents for 'allip' networks:
|
||
# This user agent is only available if the network is set to 'allip' (which is a non-limited domain 'network'
|
||
# without p2p options). Changing this will NOT change the default YaCy user agent, it will only provide an
|
||
# agent which is available at crawl start within 'allip'. The userAgent.name is the identifier for the
|
||
# robots.txt file which YaCy always obeys for the given name or a wildcard for robot types.
|
||
# If any part of this custom user agent name or string includes the phrase 'yacy', it will be IGNORED
|
||
# to prevent fraud, DoS or bad behavior in the name of YaCy.
|
||
# To use this user agent option, you must define completely different names and strings
|
||
# and remove the given example here, which will be ignored by default.
|
||
crawler.userAgent.name = yacybot
|
||
crawler.userAgent.string = yacybot ($$SYSTEM$$) http://yacy.net/bot.html
|
||
crawler.userAgent.minimumdelta = 500
|
||
crawler.userAgent.clienttimeout = 10000
|
||
|
||
# experiments with timeout requests
|
||
timeoutrequests = true
|
||
|
||
# interface decorations
|
||
decoration.audio = false
|
||
decoration.grafics.linkstructure = true
|
||
decoration.hostanalysis = false
|
||
decoration.simpleheadernavbar = navbar-default
|
||
|