mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
16baa7ad24
- download a wikipedia dump, i.e. dewiki-20090311-pages-articles.xml.bz2 from http://download.wikimedia.org/dewiki/20090311/ - move dewiki-20090311-pages-articles.xml.bz2 to DATA/HTCACHE/ - start the conversion; open a command shell, move to the yacy home directory and execute java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/ this generates a series of files to DATA/SURROGATES/in if YaCy is running (it may run concurrently), it fetches all new dumps in the surrogate-in directory. The export process is transaction-save, that means YaCy will not start reading a dump while the dump is not completely finished. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5851 6c8d7289-2bf4-0310-a012-ef5d649a1542 |
||
---|---|---|
.. | ||
bbCode.java | ||
bitfield.java | ||
consoleInterface.java | ||
crypt.java | ||
cryptbig.java | ||
CryptoLib.java | ||
diskUsage.java | ||
disorderHeap.java | ||
disorderSet.java | ||
enumerateFiles.java | ||
Formatter.java | ||
gzip.java | ||
iso639.java | ||
ListDirs.java | ||
loaderCore.java | ||
loaderProcess.java | ||
loaderThreads.java | ||
mediawikiIndex.java | ||
nxTools.java | ||
PKCS12Tool.java | ||
Punycode.java | ||
SignatureOutputStream.java | ||
tarTools.java |