mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-21 00:00:13 +02:00
16baa7ad24
- download a wikipedia dump, i.e. dewiki-20090311-pages-articles.xml.bz2 from http://download.wikimedia.org/dewiki/20090311/ - move dewiki-20090311-pages-articles.xml.bz2 to DATA/HTCACHE/ - start the conversion; open a command shell, move to the yacy home directory and execute java -Xmx2000m -cp classes:lib/bzip2.jar de.anomic.tools.mediawikiIndex -convert DATA/HTCACHE/dewiki-20090311-pages-articles.xml.bz2 DATA/SURROGATES/in/ http://de.wikipedia.org/wiki/ this generates a series of files to DATA/SURROGATES/in if YaCy is running (it may run concurrently), it fetches all new dumps in the surrogate-in directory. The export process is transaction-save, that means YaCy will not start reading a dump while the dump is not completely finished. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5851 6c8d7289-2bf4-0310-a012-ef5d649a1542 |
||
---|---|---|
.. | ||
AcceptEverythingSSLProtcolSocketFactory.java | ||
AcceptEverythingTrustManager.java | ||
DefaultCharsetFilePart.java | ||
DefaultCharsetStringPart.java | ||
EasySSLProtocolSocketFactory.java | ||
EasyX509TrustManager.java | ||
httpChunkedInputStream.java | ||
httpChunkedOutputStream.java | ||
httpClient.java | ||
HttpConnectionInfo.java | ||
httpd.java | ||
httpdAlternativeDomainNames.java | ||
httpdBoundedSizeOutputStream.java | ||
httpdByteCountInputStream.java | ||
httpdByteCountOutputStream.java | ||
httpdFileHandler.java | ||
httpdLimitExceededException.java | ||
httpdProxyCacheEntry.java | ||
httpdProxyHandler.java | ||
httpdRobotsTxtConfig.java | ||
httpHeader.java | ||
httpRemoteProxyConfig.java | ||
httpRequestHeader.java | ||
httpResponse.java | ||
httpResponseHeader.java | ||
httpSSI.java | ||
httpTemplate.java | ||
MultiOutputStream.java | ||
ProxyLogFormatter.java |