2009-10-18 03:38:07 +02:00
package net.yacy ;
2005-09-20 17:36:22 +02:00
// yacy.java
// -----------------------
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-09-20 17:36:22 +02:00
// first published on http://www.yacy.net
// Frankfurt, Germany, 2004, 2005
2005-04-07 21:19:42 +02:00
//
2005-09-20 17:36:22 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
2005-09-20 17:36:22 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
2005-04-07 21:19:42 +02:00
//
2005-09-20 17:36:22 +02:00
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
2005-04-07 21:19:42 +02:00
//
2005-09-20 17:36:22 +02:00
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2006-06-01 01:31:46 +02:00
2009-10-18 03:38:07 +02:00
2008-04-11 00:47:05 +02:00
import java.io.BufferedInputStream ;
2005-12-07 02:40:52 +01:00
import java.io.BufferedOutputStream ;
2005-05-05 07:32:19 +02:00
import java.io.BufferedReader ;
import java.io.BufferedWriter ;
import java.io.ByteArrayOutputStream ;
import java.io.File ;
import java.io.FileInputStream ;
import java.io.FileNotFoundException ;
2005-12-07 02:40:52 +01:00
import java.io.FileOutputStream ;
2005-05-05 07:32:19 +02:00
import java.io.FileWriter ;
import java.io.IOException ;
import java.io.InputStreamReader ;
import java.io.PrintWriter ;
import java.util.Iterator ;
2006-01-30 09:28:22 +01:00
import java.util.Map ;
2005-05-05 07:32:19 +02:00
import java.util.Properties ;
2009-04-16 17:29:00 +02:00
import java.util.TreeMap ;
2005-05-05 07:32:19 +02:00
import java.util.TreeSet ;
2009-10-20 00:34:44 +02:00
import java.util.concurrent.Semaphore ;
2005-12-17 16:43:13 +01:00
import java.util.zip.ZipEntry ;
import java.util.zip.ZipOutputStream ;
2005-09-27 18:28:55 +02:00
2009-10-10 02:43:25 +02:00
import net.yacy.kelondro.blob.MapDataMining ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.DigestURI ;
import net.yacy.kelondro.data.meta.URIMetadataRow ;
import net.yacy.kelondro.data.word.Word ;
import net.yacy.kelondro.data.word.WordReference ;
2009-10-10 01:32:08 +02:00
import net.yacy.kelondro.index.RowCollection ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Base64Order ;
2009-10-10 02:39:15 +02:00
import net.yacy.kelondro.rwi.Reference ;
import net.yacy.kelondro.rwi.ReferenceContainer ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.DateFormatter ;
import net.yacy.kelondro.util.FileUtils ;
import net.yacy.kelondro.util.Formatter ;
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.kelondro.util.ScoreCluster ;
2009-10-20 00:34:44 +02:00
import net.yacy.kelondro.util.OS ;
2009-10-10 01:13:30 +02:00
2008-04-07 15:12:58 +02:00
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager ;
2005-07-06 16:48:41 +02:00
import de.anomic.data.translator ;
2009-07-19 22:37:44 +02:00
import de.anomic.http.client.Client ;
import de.anomic.http.server.HTTPDemon ;
2009-10-11 02:24:42 +02:00
import de.anomic.http.server.RequestHeader ;
import de.anomic.http.server.ResponseContainer ;
2009-10-11 02:12:19 +02:00
import de.anomic.search.MetadataRepository ;
import de.anomic.search.Segment ;
2009-07-19 22:37:44 +02:00
import de.anomic.search.Switchboard ;
import de.anomic.search.SwitchboardConstants ;
2005-05-05 07:32:19 +02:00
import de.anomic.server.serverCore ;
import de.anomic.tools.enumerateFiles ;
2005-11-11 00:48:20 +01:00
import de.anomic.yacy.yacyClient ;
2006-01-30 09:28:22 +01:00
import de.anomic.yacy.yacySeedDB ;
2009-04-23 15:18:59 +02:00
import de.anomic.yacy.Tray ;
2009-07-11 19:03:22 +02:00
import de.anomic.yacy.yacyBuildProperties ;
2009-05-02 14:12:22 +02:00
import de.anomic.yacy.yacyRelease ;
2007-04-27 11:23:44 +02:00
import de.anomic.yacy.yacyVersion ;
2005-04-07 21:19:42 +02:00
2005-08-02 21:40:29 +02:00
/ * *
2005-12-07 11:31:48 +01:00
* This is the main class of YaCy . Several threads are started from here :
2005-08-02 21:40:29 +02:00
* < ul >
* < li > one single instance of the plasmaSwitchboard is generated , which itself
* starts a thread with a plasmaHTMLCache object . This object simply counts
* files sizes in the cache and terminates them . It also generates a
* plasmaCrawlerLoader object , which may itself start some more httpc - calling
* threads to load web pages . They terminate automatically when a page has
* loaded .
* < li > one serverCore - thread is started , which implements a multi - threaded
* server . The process may start itself many more processes that handle
2006-03-05 11:07:52 +01:00
* connections . lo
2005-08-02 21:40:29 +02:00
* < li > finally , all idle - dependent processes are written in a queue in
* plasmaSwitchboard which are worked off inside an idle - sensitive loop of the
* main process . ( here )
* < / ul >
*
* On termination , the following must be done :
* < ul >
2009-12-10 00:27:26 +01:00
* < li > stop feeding of the crawling process because it otherwise fills the
2005-08-02 21:40:29 +02:00
* indexing queue .
* < li > say goodbye to connected peers and disable new connections . Don ' t wait for
* success .
* < li > first terminate the serverCore thread . This prevents that new cache
* objects are queued .
* < li > wait that the plasmaHTMLCache terminates ( it should be normal that this
* process already has terminated ) .
* < li > then wait for termination of all loader process of the
* plasmaCrawlerLoader .
* < li > work off the indexing and cache storage queue . These values are inside a
* RAM cache and would be lost otherwise .
* < li > write all settings .
* < li > terminate .
* < / ul >
* /
2005-09-20 17:36:22 +02:00
public final class yacy {
2007-07-11 01:56:25 +02:00
2005-04-07 21:19:42 +02:00
// static objects
2009-07-11 19:03:22 +02:00
public static final String vString = yacyBuildProperties . getVersion ( ) ;
2007-07-11 01:56:25 +02:00
public static double version = 0 . 1 ;
2008-05-03 11:06:00 +02:00
public static boolean pro = false ;
2007-06-28 16:52:26 +02:00
2009-07-11 19:03:22 +02:00
public static final String vDATE = yacyBuildProperties . getBuildDate ( ) ;
2007-07-11 01:56:25 +02:00
public static final String copyright = " [ YaCy v " + vString + " , build " + vDATE + " by Michael Christen / www.yacy.net ] " ;
public static final String hline = " ------------------------------------------------------------------------------- " ;
2005-10-05 18:35:05 +02:00
2007-05-06 10:22:18 +02:00
/ * *
2009-07-19 22:37:44 +02:00
* a reference to the { @link Switchboard } created by the
2007-05-06 10:22:18 +02:00
* { @link yacy # startup ( String , long , long ) } method .
* /
2009-07-19 22:37:44 +02:00
private static Switchboard sb = null ;
2007-05-06 10:22:18 +02:00
/ * *
* Semaphore needed by { @link yacy # setUpdaterCallback ( serverUpdaterCallback ) } to block
* until the { @link plasmaSwitchboard } object was created .
* /
2008-11-22 00:21:33 +01:00
//private static serverSemaphore sbSync = new serverSemaphore(0);
2007-05-06 10:22:18 +02:00
/ * *
* Semaphore needed by { @link yacy # waitForFinishedStartup ( ) } to block
* until startup has finished
* /
2009-10-20 00:34:44 +02:00
private static Semaphore startupFinishedSync = new Semaphore ( 0 ) ;
2005-08-02 21:40:29 +02:00
/ * *
* Starts up the whole application . Sets up all datastructures and starts
* the main threads .
*
* @param homePath Root - path where all information is to be found .
2005-09-20 12:10:34 +02:00
* @param startupFree free memory at startup time , to be used later for statistics
2005-08-02 21:40:29 +02:00
* /
2008-08-02 14:12:04 +02:00
private static void startup ( final File homePath , final long startupMemFree , final long startupMemTotal ) {
2005-05-11 11:44:36 +02:00
try {
// start up
System . out . println ( copyright ) ;
System . out . println ( hline ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// check java version
try {
2008-05-03 12:16:04 +02:00
" a " . codePointAt ( 0 ) ; // needs at least Java 1.5
2008-08-02 14:12:04 +02:00
} catch ( final NoSuchMethodError e ) {
2008-05-03 12:16:04 +02:00
System . err . println ( " STARTUP: Java Version too low. You need at least Java 1.5 to run YaCy " ) ;
2005-12-05 01:17:12 +01:00
Thread . sleep ( 3000 ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
}
2005-09-27 18:28:55 +02:00
2006-10-11 20:27:38 +02:00
// ensure that there is a DATA directory, if not, create one and if that fails warn and die
2008-08-06 21:43:12 +02:00
File f = homePath ;
mkdirsIfNeseccary ( f ) ;
f = new File ( homePath , " DATA/ " ) ;
mkdirsIfNeseccary ( f ) ;
2006-10-11 20:27:38 +02:00
if ( ! ( f . exists ( ) ) ) {
2006-12-17 19:06:39 +01:00
System . err . println ( " Error creating DATA-directory in " + homePath . toString ( ) + " . Please check your write-permission for this folder. YaCy will now terminate. " ) ;
System . exit ( - 1 ) ;
2006-10-11 20:27:38 +02:00
}
2005-09-27 18:28:55 +02:00
2005-06-09 11:46:43 +02:00
// setting up logging
2008-06-15 00:51:47 +02:00
f = new File ( homePath , " DATA/LOG/ " ) ;
2008-08-06 21:43:12 +02:00
mkdirsIfNeseccary ( f ) ;
2008-06-15 00:51:47 +02:00
f = new File ( homePath , " DATA/LOG/yacy.logging " ) ;
2009-10-10 02:39:15 +02:00
//if (!f.exists()) try {
2009-01-31 02:06:56 +01:00
FileUtils . copy ( new File ( homePath , " yacy.logging " ) , f ) ;
2009-10-10 02:39:15 +02:00
//} catch (final IOException e){
// System.out.println("could not copy yacy.logging");
//}
2005-09-27 18:28:55 +02:00
try {
2009-01-31 00:33:47 +01:00
Log . configureLogging ( homePath , new File ( homePath , " DATA/LOG/yacy.logging " ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2005-06-13 14:01:58 +02:00
System . out . println ( " could not find logging properties in homePath= " + homePath ) ;
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2005-06-13 14:01:58 +02:00
}
2010-01-10 17:10:11 +01:00
Log . logConfig ( " STARTUP " , " YaCy version: " + yacyBuildProperties . getVersion ( ) + " / " + yacyBuildProperties . getSVNRevision ( ) ) ;
2009-01-31 00:33:47 +01:00
Log . logConfig ( " STARTUP " , " Java version: " + System . getProperty ( " java.version " , " no-java-version " ) ) ;
Log . logConfig ( " STARTUP " , " Operation system: " + System . getProperty ( " os.name " , " unknown " ) ) ;
Log . logConfig ( " STARTUP " , " Application root-path: " + homePath ) ;
Log . logConfig ( " STARTUP " , " Time zone: UTC " + DateFormatter . UTCDiffString ( ) + " ; UTC+0000 is " + System . currentTimeMillis ( ) ) ;
2009-10-20 00:34:44 +02:00
Log . logConfig ( " STARTUP " , " Maximum file system path length: " + OS . maxPathLength ) ;
2007-06-08 14:45:03 +02:00
f = new File ( homePath , " DATA/yacy.running " ) ;
if ( f . exists ( ) ) { // another instance running? VM crash? User will have to care about this
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " WARNING: the file " + f + " exists, this usually means that a YaCy instance is still running " ) ;
2008-08-06 21:43:12 +02:00
delete ( f ) ;
2007-06-08 14:45:03 +02:00
}
2008-08-06 21:43:12 +02:00
if ( ! f . createNewFile ( ) )
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " WARNING: the file " + f + " can not be created! " ) ;
2007-06-13 22:55:48 +02:00
f . deleteOnExit ( ) ;
2007-06-28 16:52:26 +02:00
pro = new File ( homePath , " libx " ) . exists ( ) ;
2008-08-02 14:12:04 +02:00
final String oldconf = " DATA/SETTINGS/httpProxy.conf " . replace ( " / " , File . separator ) ;
final String newconf = " DATA/SETTINGS/yacy.conf " . replace ( " / " , File . separator ) ;
final File oldconffile = new File ( homePath , oldconf ) ;
2008-03-16 23:31:54 +01:00
if ( oldconffile . exists ( ) ) {
2008-08-06 21:43:12 +02:00
final File newconfFile = new File ( homePath , newconf ) ;
if ( ! oldconffile . renameTo ( newconfFile ) )
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " WARNING: the file " + oldconffile + " can not be renamed to " + newconfFile + " ! " ) ;
2008-03-16 23:31:54 +01:00
}
2009-07-19 22:37:44 +02:00
sb = new Switchboard ( homePath , " defaults/yacy.init " . replace ( " / " , File . separator ) , newconf , pro ) ;
2008-11-22 00:21:33 +01:00
//sbSync.V(); // signal that the sb reference was set
2005-10-05 18:35:05 +02:00
2005-09-20 12:10:34 +02:00
// save information about available memory at startup time
2005-09-21 02:12:37 +02:00
sb . setConfig ( " memoryFreeAfterStartup " , startupMemFree ) ;
sb . setConfig ( " memoryTotalAfterStartup " , startupMemTotal ) ;
2005-09-20 12:10:34 +02:00
2005-06-30 00:55:37 +02:00
// hardcoded, forced, temporary value-migration
2005-09-21 02:12:37 +02:00
sb . setConfig ( " htTemplatePath " , " htroot/env/templates " ) ;
2005-08-02 21:40:29 +02:00
2009-07-12 11:38:03 +02:00
int oldRev ;
try {
oldRev = Integer . parseInt ( sb . getConfig ( " svnRevision " , " 0 " ) ) ;
} catch ( NumberFormatException e ) {
oldRev = 0 ;
}
2009-07-11 19:03:22 +02:00
int newRev = Integer . parseInt ( yacyBuildProperties . getSVNRevision ( ) ) ;
2009-07-12 11:38:03 +02:00
sb . setConfig ( " svnRevision " , yacyBuildProperties . getSVNRevision ( ) ) ;
2005-08-02 21:40:29 +02:00
2009-07-11 19:03:22 +02:00
// TODO: remove!
//sb.setConfig("version", Double.toString(version));
//sb.setConfig("vString", yacyVersion.combined2prettyVersion(Double.toString(version)));
//sb.setConfig("vdate", (vDATE.startsWith("@")) ? DateFormatter.formatShortDay() : vDATE);
2008-02-01 00:40:47 +01:00
sb . setConfig ( " applicationRoot " , homePath . toString ( ) ) ;
2007-04-27 11:23:44 +02:00
yacyVersion . latestRelease = version ;
2005-08-02 21:40:29 +02:00
2005-05-11 11:44:36 +02:00
// read environment
2009-06-11 12:20:31 +02:00
final int timeout = Math . max ( 5000 , Integer . parseInt ( sb . getConfig ( " httpdTimeout " , " 5000 " ) ) ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// create some directories
2005-09-22 22:25:56 +02:00
final File htRootPath = new File ( homePath , sb . getConfig ( " htRootPath " , " htroot " ) ) ;
2009-07-19 22:37:44 +02:00
final File htDocsPath = sb . getConfigPath ( SwitchboardConstants . HTDOCS_PATH , SwitchboardConstants . HTDOCS_PATH_DEFAULT ) ;
2008-08-06 21:43:12 +02:00
mkdirIfNeseccary ( htDocsPath ) ;
2005-12-05 10:13:13 +01:00
//final File htTemplatePath = new File(homePath, sb.getConfig("htTemplatePath","htdocs"));
2005-08-02 21:40:29 +02:00
2005-08-02 02:16:19 +02:00
// create default notifier picture
2006-02-03 22:21:42 +01:00
//TODO: Use templates instead of copying images ...
2006-02-04 11:50:22 +01:00
if ( ! ( ( new File ( htDocsPath , " notifier.gif " ) ) . exists ( ) ) ) try {
2009-01-31 02:06:56 +01:00
FileUtils . copy ( new File ( htRootPath , " env/grafics/empty.gif " ) ,
2006-02-03 22:21:42 +01:00
new File ( htDocsPath , " notifier.gif " ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) { }
2005-08-02 21:40:29 +02:00
2008-02-18 17:38:06 +01:00
final File htdocsReadme = new File ( htDocsPath , " readme.txt " ) ;
2009-01-31 02:06:56 +01:00
if ( ! ( htdocsReadme . exists ( ) ) ) try { FileUtils . copy ( (
2005-05-11 11:44:36 +02:00
" This is your root directory for individual Web Content \ r \ n " +
" \ r \ n " +
" Please place your html files into the www subdirectory. \ r \ n " +
" The URL of that path is either \ r \ n " +
" http://www.<your-peer-name>.yacy or \ r \ n " +
" http://<your-ip>:<your-port>/www \ r \ n " +
" \ r \ n " +
" Other subdirectories may be created; they map to corresponding sub-domains. \ r \ n " +
" This directory shares it's content with the applications htroot path, so you \ r \ n " +
" may access your yacy search page with \ r \ n " +
" http://<your-peer-name>.yacy/ \ r \ n " +
2008-08-02 14:12:04 +02:00
" \ r \ n " ) . getBytes ( ) , htdocsReadme ) ; } catch ( final IOException e ) {
2005-07-03 14:40:36 +02:00
System . out . println ( " Error creating htdocs readme: " + e . getMessage ( ) ) ;
}
2005-08-02 21:40:29 +02:00
2005-09-20 17:36:22 +02:00
final File wwwDefaultPath = new File ( htDocsPath , " www " ) ;
2008-08-06 21:43:12 +02:00
mkdirIfNeseccary ( wwwDefaultPath ) ;
2005-08-02 21:40:29 +02:00
2005-09-20 17:36:22 +02:00
final File shareDefaultPath = new File ( htDocsPath , " share " ) ;
2008-08-06 21:43:12 +02:00
mkdirIfNeseccary ( shareDefaultPath ) ;
2005-08-02 21:40:29 +02:00
2006-02-12 17:46:43 +01:00
migration . migrate ( sb , oldRev , newRev ) ;
2005-12-06 23:30:15 +01:00
2008-04-22 18:53:53 +02:00
// delete old release files
2008-08-02 14:12:04 +02:00
final int deleteOldDownloadsAfterDays = ( int ) sb . getConfigLong ( " update.deleteOld " , 30 ) ;
2009-05-02 14:12:22 +02:00
yacyRelease . deleteOldDownloads ( sb . releasePath , deleteOldDownloadsAfterDays ) ;
2008-04-22 18:53:53 +02:00
2008-05-04 14:18:00 +02:00
// set user-agent
2008-05-04 19:26:19 +02:00
final String userAgent = " yacy/ " + Double . toString ( version ) + " (www.yacy.net; "
2009-07-19 22:37:44 +02:00
+ Client . getSystemOST ( ) + " ) " ;
Client . setUserAgent ( userAgent ) ;
2008-05-04 14:18:00 +02:00
2005-05-11 11:44:36 +02:00
// start main threads
2006-03-04 12:07:01 +01:00
final String port = sb . getConfig ( " port " , " 8080 " ) ;
2005-05-11 11:44:36 +02:00
try {
2009-07-19 22:37:44 +02:00
final HTTPDemon protocolHandler = new HTTPDemon ( sb ) ;
2005-11-15 16:03:15 +01:00
final serverCore server = new serverCore (
2005-05-11 11:44:36 +02:00
timeout /*control socket timeout in milliseconds*/ ,
true /* block attacks (wrong protocol) */ ,
protocolHandler /*command class*/ ,
2005-09-21 02:12:37 +02:00
sb ,
2005-06-10 11:19:24 +02:00
30000 /*command max length incl. GET args*/ ) ;
2005-05-11 11:44:36 +02:00
server . setName ( " httpd: " + port ) ;
2005-07-03 14:40:36 +02:00
server . setPriority ( Thread . MAX_PRIORITY ) ;
2005-10-31 11:46:13 +01:00
server . setObeyIntermission ( false ) ;
2005-05-11 11:44:36 +02:00
if ( server = = null ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " Failed to start server. Probably port " + port + " already in use. " ) ;
2005-05-11 11:44:36 +02:00
} else {
// first start the server
2005-09-21 02:12:37 +02:00
sb . deployThread ( " 10_httpd " , " HTTPD Server/Proxy " , " the HTTPD, used as web server and proxy " , null , server , 0 , 0 , 0 , 0 ) ;
2005-05-11 11:44:36 +02:00
//server.start();
2005-08-02 21:40:29 +02:00
2005-05-11 11:44:36 +02:00
// open the browser window
2009-07-19 22:37:44 +02:00
final boolean browserPopUpTrigger = sb . getConfig ( SwitchboardConstants . BROWSER_POP_UP_TRIGGER , " true " ) . equals ( " true " ) ;
2005-05-11 11:44:36 +02:00
if ( browserPopUpTrigger ) {
2009-07-19 22:37:44 +02:00
final String browserPopUpPage = sb . getConfig ( SwitchboardConstants . BROWSER_POP_UP_PAGE , " ConfigBasic.html " ) ;
2008-06-02 23:49:59 +02:00
//boolean properPW = (sb.getConfig("adminAccount", "").length() == 0) && (sb.getConfig(httpd.ADMIN_ACCOUNT_B64MD5, "").length() > 0);
//if (!properPW) browserPopUpPage = "ConfigBasic.html";
2009-07-19 22:37:44 +02:00
final String browserPopUpApplication = sb . getConfig ( SwitchboardConstants . BROWSER_POP_UP_APPLICATION , " firefox " ) ;
2009-10-20 00:34:44 +02:00
OS . openBrowser ( ( server . withSSL ( ) ? " https " : " http " ) + " ://localhost: " + serverCore . getPortNr ( port ) + " / " + browserPopUpPage , browserPopUpApplication ) ;
2005-05-11 11:44:36 +02:00
}
2008-07-18 16:17:52 +02:00
// unlock yacyTray browser popup
2009-04-23 15:18:59 +02:00
Tray . lockBrowserPopup = false ;
2005-08-02 21:40:29 +02:00
2007-07-04 12:32:30 +02:00
// Copy the shipped locales into DATA, existing files are overwritten
2007-11-04 11:36:25 +01:00
final File locale_work = sb . getConfigPath ( " locale.work " , " DATA/LOCALE/locales " ) ;
final File locale_source = sb . getConfigPath ( " locale.source " , " locales " ) ;
2005-05-27 10:36:07 +02:00
try {
2007-07-04 12:32:30 +02:00
final File [ ] locale_source_files = locale_source . listFiles ( ) ;
2008-08-06 21:43:12 +02:00
mkdirsIfNeseccary ( locale_work ) ;
2007-07-04 12:32:30 +02:00
File target ;
for ( int i = 0 ; i < locale_source_files . length ; i + + ) {
target = new File ( locale_work , locale_source_files [ i ] . getName ( ) ) ;
if ( locale_source_files [ i ] . getName ( ) . endsWith ( " .lng " ) ) {
2008-08-06 21:43:12 +02:00
if ( target . exists ( ) ) delete ( target ) ;
2009-01-31 02:06:56 +01:00
FileUtils . copy ( locale_source_files [ i ] , target ) ;
2007-07-04 12:32:30 +02:00
}
2005-05-27 10:36:07 +02:00
}
2009-01-31 00:33:47 +01:00
Log . logInfo ( " STARTUP " , " Copied the default locales to " + locale_work . toString ( ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NullPointerException e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " Nullpointer Exception while copying the default Locales " ) ;
2005-05-27 10:36:07 +02:00
}
2005-08-02 21:40:29 +02:00
2005-07-03 14:40:36 +02:00
//regenerate Locales from Translationlist, if needed
2007-07-04 12:32:30 +02:00
final String lang = sb . getConfig ( " locale.language " , " " ) ;
if ( ! lang . equals ( " " ) & & ! lang . equals ( " default " ) ) { //locale is used
2005-07-03 14:40:36 +02:00
String currentRev = " " ;
try {
2007-11-04 11:36:25 +01:00
final BufferedReader br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( new File ( sb . getConfigPath ( " locale.translated_html " , " DATA/LOCALE/htroot " ) , lang + " /version " ) ) ) ) ;
2005-07-03 14:40:36 +02:00
currentRev = br . readLine ( ) ;
br . close ( ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2005-07-03 14:40:36 +02:00
//Error
}
2005-08-02 21:40:29 +02:00
2007-07-04 12:32:30 +02:00
if ( ! currentRev . equals ( sb . getConfig ( " svnRevision " , " " ) ) ) try { //is this another version?!
final File sourceDir = new File ( sb . getConfig ( " htRootPath " , " htroot " ) ) ;
2007-11-04 11:36:25 +01:00
final File destDir = new File ( sb . getConfigPath ( " locale.translated_html " , " DATA/LOCALE/htroot " ) , lang ) ;
2007-07-04 12:32:30 +02:00
if ( translator . translateFilesRecursive ( sourceDir , destDir , new File ( locale_work , lang + " .lng " ) , " html,template,inc " , " locale " ) ) { //translate it
//write the new Versionnumber
final BufferedWriter bw = new BufferedWriter ( new PrintWriter ( new FileWriter ( new File ( destDir , " version " ) ) ) ) ;
bw . write ( sb . getConfig ( " svnRevision " , " Error getting Version " ) ) ;
bw . close ( ) ;
2005-07-03 14:40:36 +02:00
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) { }
2005-07-03 14:40:36 +02:00
}
2007-10-16 04:12:31 +02:00
// initialize number formatter with this locale
2009-01-31 02:06:56 +01:00
Formatter . setLocale ( lang ) ;
2007-10-16 04:12:31 +02:00
2005-05-11 11:44:36 +02:00
// registering shutdown hook
2009-01-31 00:33:47 +01:00
Log . logConfig ( " STARTUP " , " Registering Shutdown Hook " ) ;
2005-09-20 17:36:22 +02:00
final Runtime run = Runtime . getRuntime ( ) ;
2005-09-21 02:12:37 +02:00
run . addShutdownHook ( new shutdownHookThread ( Thread . currentThread ( ) , sb ) ) ;
2005-08-02 21:40:29 +02:00
2005-09-20 12:10:34 +02:00
// save information about available memory after all initializations
2006-01-31 00:07:20 +01:00
//try {
2009-01-30 16:33:00 +01:00
sb . setConfig ( " memoryFreeAfterInitBGC " , MemoryControl . free ( ) ) ;
sb . setConfig ( " memoryTotalAfterInitBGC " , MemoryControl . total ( ) ) ;
2006-01-30 13:42:06 +01:00
System . gc ( ) ;
2009-01-30 16:33:00 +01:00
sb . setConfig ( " memoryFreeAfterInitAGC " , MemoryControl . free ( ) ) ;
sb . setConfig ( " memoryTotalAfterInitAGC " , MemoryControl . total ( ) ) ;
2006-01-31 00:07:20 +01:00
//} catch (ConcurrentModificationException e) {}
2005-09-21 14:21:01 +02:00
2007-05-06 10:22:18 +02:00
// signal finished startup
2009-10-20 00:34:44 +02:00
startupFinishedSync . release ( ) ;
2007-05-06 10:22:18 +02:00
2005-05-11 11:44:36 +02:00
// wait for server shutdown
try {
2005-09-21 02:12:37 +02:00
sb . waitForShutdown ( ) ;
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " MAIN CONTROL LOOP " , " PANIC: " + e . getMessage ( ) , e ) ;
2005-04-07 21:19:42 +02:00
}
// shut down
2009-01-30 16:33:00 +01:00
if ( RowCollection . sortingthreadexecutor ! = null ) RowCollection . sortingthreadexecutor . shutdown ( ) ;
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " caught termination signal " ) ;
2005-04-07 21:19:42 +02:00
server . terminate ( false ) ;
2005-05-11 11:44:36 +02:00
server . interrupt ( ) ;
2008-04-07 15:12:58 +02:00
server . close ( ) ;
2005-05-11 11:44:36 +02:00
if ( server . isAlive ( ) ) try {
2008-04-05 15:17:16 +02:00
// TODO only send request, don't read response (cause server is already down resulting in error)
2009-10-11 02:12:19 +02:00
final DigestURI u = new DigestURI ( ( server . withSSL ( ) ? " https " : " http " ) + " ://localhost: " + serverCore . getPortNr ( port ) , null ) ;
2009-07-19 22:37:44 +02:00
Client . wget ( u . toString ( ) , null , 10000 ) ; // kick server
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " sent termination signal to server socket " ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException ee ) {
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " termination signal to server socket missed (server shutdown, ok) " ) ;
2005-04-07 21:19:42 +02:00
}
2009-07-19 22:37:44 +02:00
Client . closeAllConnections ( ) ;
2008-04-07 15:12:58 +02:00
MultiThreadedHttpConnectionManager . shutdownAll ( ) ;
2005-04-07 21:19:42 +02:00
// idle until the processes are down
2008-04-07 15:12:58 +02:00
if ( server . isAlive ( ) ) {
2008-11-15 00:36:33 +01:00
//Thread.sleep(2000); // wait a while
2008-04-07 15:12:58 +02:00
server . interrupt ( ) ;
MultiThreadedHttpConnectionManager . shutdownAll ( ) ;
2005-04-07 21:19:42 +02:00
}
2009-07-15 16:15:51 +02:00
MultiThreadedHttpConnectionManager . shutdownAll ( ) ;
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " server has terminated " ) ;
2005-09-21 02:12:37 +02:00
sb . close ( ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " Unexpected Error: " + e . getClass ( ) . getName ( ) , e ) ;
2005-05-11 11:44:36 +02:00
//System.exit(1);
}
2008-08-02 14:12:04 +02:00
} catch ( final Exception ee ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " FATAL ERROR: " + ee . getMessage ( ) , ee ) ;
2007-05-06 10:22:18 +02:00
} finally {
2009-10-20 00:34:44 +02:00
startupFinishedSync . release ( ) ;
2005-05-11 11:44:36 +02:00
}
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " goodbye. (this is the last line) " ) ;
2009-06-15 23:39:24 +02:00
Log . shutdown ( ) ;
2008-09-04 23:28:00 +02:00
try {
System . exit ( 0 ) ;
} catch ( Exception e ) { } // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790)
2005-04-07 21:19:42 +02:00
}
2005-08-02 21:40:29 +02:00
2008-08-06 21:43:12 +02:00
/ * *
* @param f
* /
private static void delete ( File f ) {
if ( ! f . delete ( ) )
2009-01-31 00:33:47 +01:00
Log . logSevere ( " STARTUP " , " WARNING: the file " + f + " can not be deleted! " ) ;
2008-08-06 21:43:12 +02:00
}
/ * *
* @see File # mkdir ( )
* @param path
* /
private static void mkdirIfNeseccary ( final File path ) {
if ( ! ( path . exists ( ) ) )
if ( ! path . mkdir ( ) )
2009-01-31 00:33:47 +01:00
Log . logWarning ( " STARTUP " , " could not create directory " + path . toString ( ) ) ;
2008-08-06 21:43:12 +02:00
}
/ * *
* @see File # mkdirs ( )
* @param path
* /
private static void mkdirsIfNeseccary ( final File path ) {
if ( ! ( path . exists ( ) ) )
if ( ! path . mkdirs ( ) )
2009-01-31 00:33:47 +01:00
Log . logWarning ( " STARTUP " , " could not create directories " + path . toString ( ) ) ;
2008-08-06 21:43:12 +02:00
}
/ * *
2005-08-02 21:40:29 +02:00
* Loads the configuration from the data - folder .
* FIXME : Why is this called over and over again from every method , instead
* of setting the configurationdata once for this class in main ?
*
* @param mes Where are we called from , so that the errormessages can be
* more descriptive .
* @param homePath Root - path where all the information is to be found .
* @return Properties read from the configurationfile .
* /
2008-08-02 14:12:04 +02:00
private static Properties configuration ( final String mes , final File homePath ) {
2009-01-31 00:33:47 +01:00
Log . logConfig ( mes , " Application Root Path: " + homePath . toString ( ) ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// read data folder
2008-08-02 14:12:04 +02:00
final File dataFolder = new File ( homePath , " DATA " ) ;
2005-04-07 21:19:42 +02:00
if ( ! ( dataFolder . exists ( ) ) ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( mes , " Application was never started or root path wrong. " ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
}
2005-08-02 21:40:29 +02:00
2008-08-02 14:12:04 +02:00
final Properties config = new Properties ( ) ;
2008-08-06 21:43:12 +02:00
FileInputStream fis = null ;
try {
fis = new FileInputStream ( new File ( homePath , " DATA/SETTINGS/yacy.conf " ) ) ;
config . load ( fis ) ;
2008-08-02 14:12:04 +02:00
} catch ( final FileNotFoundException e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( mes , " could not find configuration file. " ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( mes , " could not read configuration file. " ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
2008-08-06 21:43:12 +02:00
} finally {
if ( fis ! = null ) {
try {
fis . close ( ) ;
} catch ( IOException e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2008-08-06 21:43:12 +02:00
}
}
2005-04-07 21:19:42 +02:00
}
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
return config ;
}
2007-04-29 17:56:45 +02:00
2009-12-08 15:25:51 +01:00
public static void shutdown ( String reason ) {
2007-04-29 17:56:45 +02:00
if ( sb ! = null ) {
// YaCy is running in the same runtime. we can shutdown via interrupt
2009-12-08 15:25:51 +01:00
sb . terminate ( reason ) ;
2007-04-29 17:56:45 +02:00
} else {
2008-08-02 14:12:04 +02:00
final File applicationRoot = new File ( System . getProperty ( " user.dir " ) . replace ( '\\' , '/' ) ) ;
2007-04-29 17:56:45 +02:00
shutdown ( applicationRoot ) ;
}
2006-02-01 12:03:37 +01:00
}
2005-08-02 21:40:29 +02:00
/ * *
2006-05-26 14:18:12 +02:00
* Call the shutdown - page of YaCy to tell it to shut down . This method is
2005-08-02 21:40:29 +02:00
* called if you start yacy with the argument - shutdown .
*
* @param homePath Root - path where all the information is to be found .
* /
2009-06-11 13:31:26 +02:00
public static void shutdown ( final File homePath ) {
2005-04-07 21:19:42 +02:00
// start up
System . out . println ( copyright ) ;
System . out . println ( hline ) ;
2009-06-11 13:31:26 +02:00
submitURL ( homePath , " Steering.html?shutdown= " , " Terminate YaCy " ) ;
}
2005-08-02 21:40:29 +02:00
2009-06-11 13:31:26 +02:00
public static void update ( final File homePath ) {
// start up
System . out . println ( copyright ) ;
System . out . println ( hline ) ;
submitURL ( homePath , " ConfigUpdate_p.html?autoUpdate= " , " Update YaCy to most recent version " ) ;
}
private static void submitURL ( final File homePath , String path , String processdescription ) {
final Properties config = configuration ( " COMMAND-STEERING " , homePath ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// read port
2008-08-02 14:12:04 +02:00
final int port = serverCore . getPortNr ( config . getProperty ( " port " , " 8080 " ) ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// read password
2009-07-19 22:37:44 +02:00
String encodedPassword = ( String ) config . get ( HTTPDemon . ADMIN_ACCOUNT_B64MD5 ) ;
2005-04-07 21:19:42 +02:00
if ( encodedPassword = = null ) encodedPassword = " " ; // not defined
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// send 'wget' to web interface
2009-07-19 22:37:44 +02:00
final RequestHeader requestHeader = new RequestHeader ( ) ;
requestHeader . put ( RequestHeader . AUTHORIZATION , " realm= " + encodedPassword ) ; // for http-authentify
final Client con = new Client ( 10000 , requestHeader ) ;
ResponseContainer res = null ;
2005-04-07 21:19:42 +02:00
try {
2009-06-11 13:31:26 +02:00
res = con . GET ( " http://localhost: " + port + " / " + path ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// read response
2008-04-05 15:17:16 +02:00
if ( res . getStatusLine ( ) . startsWith ( " 2 " ) ) {
2009-06-11 13:31:26 +02:00
Log . logConfig ( " COMMAND-STEERING " , " YACY accepted steering command: " + processdescription ) ;
2008-08-02 14:12:04 +02:00
final ByteArrayOutputStream bos = new ByteArrayOutputStream ( ) ;
2008-04-11 00:47:05 +02:00
try {
2009-01-31 02:06:56 +01:00
FileUtils . copyToStream ( new BufferedInputStream ( res . getDataAsStream ( ) ) , new BufferedOutputStream ( bos ) ) ;
2008-04-11 00:47:05 +02:00
} finally {
res . closeStream ( ) ;
}
2005-04-07 21:19:42 +02:00
} else {
2009-06-11 13:31:26 +02:00
Log . logSevere ( " COMMAND-STEERING " , " error response from YACY socket: " + res . getStatusLine ( ) ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2009-06-11 13:31:26 +02:00
Log . logSevere ( " COMMAND-STEERING " , " could not establish connection to YACY socket: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
2008-04-05 15:17:16 +02:00
} finally {
// release connection
if ( res ! = null ) {
res . closeStream ( ) ;
}
2005-04-07 21:19:42 +02:00
}
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// finished
2009-06-11 13:31:26 +02:00
Log . logConfig ( " COMMAND-STEERING " , " SUCCESSFULLY FINISHED COMMAND: " + processdescription ) ;
2005-04-07 21:19:42 +02:00
}
2009-06-11 13:31:26 +02:00
2005-08-02 21:40:29 +02:00
/ * *
* This method gets all found words and outputs a statistic about the score
* of the words . The output of this method can be used to create stop - word
* lists . This method will be called if you start yacy with the argument
* - genwordstat .
* FIXME : How can stop - word list be created from this output ? What type of
* score is output ?
*
* @param homePath Root - Path where all the information is to be found .
* /
2008-08-02 14:12:04 +02:00
private static void genWordstat ( final File homePath ) {
2005-04-07 21:19:42 +02:00
// start up
System . out . println ( copyright ) ;
System . out . println ( hline ) ;
2005-08-02 21:40:29 +02:00
2005-05-11 11:44:36 +02:00
// load words
2009-01-31 00:33:47 +01:00
Log . logInfo ( " GEN-WORDSTAT " , " loading words... " ) ;
2009-04-16 17:29:00 +02:00
final TreeMap < byte [ ] , String > words = loadWordMap ( new File ( homePath , " yacy.words " ) ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// find all hashes
2009-01-31 00:33:47 +01:00
Log . logInfo ( " GEN-WORDSTAT " , " searching all word-hash databases... " ) ;
2009-07-02 19:01:23 +02:00
final File dbRoot = new File ( homePath , " DATA/INDEX/freeworld/ " ) ;
2008-08-02 14:12:04 +02:00
final enumerateFiles ef = new enumerateFiles ( new File ( dbRoot , " WORDS " ) , true , false , true , true ) ;
2005-04-07 21:19:42 +02:00
File f ;
2009-04-16 17:29:00 +02:00
byte [ ] h ;
final ScoreCluster < byte [ ] > hs = new ScoreCluster < byte [ ] > ( ) ;
2005-04-07 21:19:42 +02:00
while ( ef . hasMoreElements ( ) ) {
2008-06-06 18:01:27 +02:00
f = ef . nextElement ( ) ;
2009-10-11 02:12:19 +02:00
h = f . getName ( ) . substring ( 0 , Word . commonHashLength ) . getBytes ( ) ;
2005-04-07 21:19:42 +02:00
hs . addScore ( h , ( int ) f . length ( ) ) ;
}
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// list the hashes in reverse order
2009-01-31 00:33:47 +01:00
Log . logInfo ( " GEN-WORDSTAT " , " listing words in reverse size order... " ) ;
2005-04-07 21:19:42 +02:00
String w ;
2009-04-16 17:29:00 +02:00
final Iterator < byte [ ] > i = hs . scores ( false ) ;
2005-04-07 21:19:42 +02:00
while ( i . hasNext ( ) ) {
2008-01-23 22:23:17 +01:00
h = i . next ( ) ;
w = words . get ( h ) ;
2005-04-07 21:19:42 +02:00
if ( w = = null ) System . out . print ( " # " + h ) ; else System . out . print ( w ) ;
System . out . println ( " - " + hs . getScore ( h ) ) ;
}
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// finished
2009-01-31 00:33:47 +01:00
Log . logConfig ( " GEN-WORDSTAT " , " FINISHED " ) ;
2005-04-07 21:19:42 +02:00
}
2005-10-13 14:31:32 +02:00
2006-06-09 07:38:59 +02:00
/ * *
* @param homePath path to the YaCy directory
2008-04-05 15:17:16 +02:00
* @param networkName
2006-06-09 07:38:59 +02:00
* /
2008-08-02 14:12:04 +02:00
public static void minimizeUrlDB ( final File homePath , final String networkName ) {
2006-02-21 15:10:00 +01:00
// run with "java -classpath classes yacy -minimizeUrlDB"
2009-01-31 00:33:47 +01:00
try { Log . configureLogging ( homePath , new File ( homePath , " DATA/LOG/yacy.logging " ) ) ; } catch ( final Exception e ) { }
2008-08-02 14:12:04 +02:00
final File indexPrimaryRoot = new File ( homePath , " DATA/INDEX " ) ;
final File indexRoot2 = new File ( homePath , " DATA/INDEX2 " ) ;
2009-01-31 00:33:47 +01:00
final Log log = new Log ( " URL-CLEANUP " ) ;
2005-10-05 12:45:33 +02:00
try {
log . logInfo ( " STARTING URL CLEANUP " ) ;
// db containing all currently loades urls
2009-10-09 16:44:20 +02:00
final MetadataRepository currentUrlDB = new MetadataRepository ( new File ( new File ( indexPrimaryRoot , networkName ) , " TEXT " ) , " text.urlmd " , false , false ) ;
2005-10-05 12:45:33 +02:00
// db used to hold all neede urls
2009-10-09 16:44:20 +02:00
final MetadataRepository minimizedUrlDB = new MetadataRepository ( new File ( new File ( indexRoot2 , networkName ) , " TEXT " ) , " text.urlmd " , false , false ) ;
2005-10-05 12:45:33 +02:00
2009-04-17 00:45:39 +02:00
final int cacheMem = ( int ) ( MemoryControl . maxMemory - MemoryControl . total ( ) ) ;
2006-12-05 03:47:51 +01:00
if ( cacheMem < 2048000 ) throw new OutOfMemoryError ( " Not enough memory available to start clean up. " ) ;
2006-06-09 07:38:59 +02:00
2009-05-28 16:26:05 +02:00
final Segment wordIndex = new Segment (
2009-05-25 08:59:21 +02:00
log ,
2009-05-28 16:26:05 +02:00
new File ( new File ( indexPrimaryRoot , " freeworld " ) , " TEXT " ) ,
2009-05-25 08:59:21 +02:00
10000 ,
2009-09-07 22:30:57 +02:00
( long ) Integer . MAX_VALUE , false , false ) ;
2009-05-29 12:03:35 +02:00
final Iterator < ReferenceContainer < WordReference > > indexContainerIterator = wordIndex . termIndex ( ) . references ( " AAAAAAAAAAAA " . getBytes ( ) , false , false ) ;
2005-10-05 12:45:33 +02:00
long urlCounter = 0 , wordCounter = 0 ;
long wordChunkStart = System . currentTimeMillis ( ) , wordChunkEnd = 0 ;
2007-01-31 10:22:22 +01:00
String wordChunkStartHash = " AAAAAAAAAAAA " , wordChunkEndHash ;
2005-10-05 12:45:33 +02:00
2006-07-26 13:21:51 +02:00
while ( indexContainerIterator . hasNext ( ) ) {
2009-04-15 08:34:27 +02:00
ReferenceContainer < WordReference > wordIdxContainer = null ;
2005-10-05 12:45:33 +02:00
try {
wordCounter + + ;
2008-01-23 22:23:17 +01:00
wordIdxContainer = indexContainerIterator . next ( ) ;
2005-10-05 12:45:33 +02:00
// the combined container will fit, read the container
2009-04-15 08:34:27 +02:00
final Iterator < WordReference > wordIdxEntries = wordIdxContainer . entries ( ) ;
2009-03-02 00:58:14 +01:00
Reference iEntry ;
2005-10-13 15:57:15 +02:00
while ( wordIdxEntries . hasNext ( ) ) {
2008-06-06 18:01:27 +02:00
iEntry = wordIdxEntries . next ( ) ;
2009-04-07 11:34:41 +02:00
final String urlHash = iEntry . metadataHash ( ) ;
2005-12-15 11:31:00 +01:00
if ( ( currentUrlDB . exists ( urlHash ) ) & & ( ! minimizedUrlDB . exists ( urlHash ) ) ) try {
2009-10-11 02:12:19 +02:00
final URIMetadataRow urlEntry = currentUrlDB . load ( urlHash , null , 0 ) ;
2005-12-15 11:31:00 +01:00
urlCounter + + ;
2006-10-16 17:04:16 +02:00
minimizedUrlDB . store ( urlEntry ) ;
2005-10-05 12:45:33 +02:00
if ( urlCounter % 500 = = 0 ) {
log . logInfo ( urlCounter + " URLs found so far. " ) ;
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) { }
2005-10-05 12:45:33 +02:00
}
if ( wordCounter % 500 = = 0 ) {
2009-04-18 16:35:18 +02:00
wordChunkEndHash = wordIdxContainer . getTermHashAsString ( ) ;
2005-10-05 12:45:33 +02:00
wordChunkEnd = System . currentTimeMillis ( ) ;
2008-08-02 14:12:04 +02:00
final long duration = wordChunkEnd - wordChunkStart ;
2005-10-05 12:45:33 +02:00
log . logInfo ( wordCounter + " words scanned " +
" [ " + wordChunkStartHash + " .. " + wordChunkEndHash + " ] \ n " +
" Duration: " + 500 * 1000 / duration + " words/s " +
2009-01-30 16:33:00 +01:00
" | Free memory: " + MemoryControl . free ( ) +
" | Total memory: " + MemoryControl . total ( ) ) ;
2005-10-05 12:45:33 +02:00
wordChunkStart = wordChunkEnd ;
wordChunkStartHash = wordChunkEndHash ;
}
2006-07-26 13:21:51 +02:00
// we have read all elements, now we can close it
wordIdxContainer = null ;
2005-10-05 12:45:33 +02:00
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2006-04-20 10:20:12 +02:00
log . logSevere ( " Exception " , e ) ;
2005-10-05 12:45:33 +02:00
} finally {
2008-08-02 14:12:04 +02:00
if ( wordIdxContainer ! = null ) try { wordIdxContainer = null ; } catch ( final Exception e ) { }
2005-10-05 12:45:33 +02:00
}
}
2006-10-22 09:09:45 +02:00
log . logInfo ( " current LURL DB contains " + currentUrlDB . size ( ) + " entries. " ) ;
log . logInfo ( " mimimized LURL DB contains " + minimizedUrlDB . size ( ) + " entries. " ) ;
2005-10-05 12:45:33 +02:00
currentUrlDB . close ( ) ;
minimizedUrlDB . close ( ) ;
2006-12-05 03:47:51 +01:00
wordIndex . close ( ) ;
2005-10-05 12:45:33 +02:00
2006-06-09 07:38:59 +02:00
// TODO: rename the mimimized UrlDB to the name of the previous UrlDB
2005-10-05 12:45:33 +02:00
log . logInfo ( " FINISHED URL CLEANUP, WAIT FOR DUMP " ) ;
2006-09-14 12:12:41 +02:00
log . logInfo ( " You can now backup your old URL DB and rename minimized/urlHash.db to urlHash.db " ) ;
2006-06-09 07:38:59 +02:00
2005-10-05 12:45:33 +02:00
log . logInfo ( " TERMINATED URL CLEANUP " ) ;
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2006-06-09 07:38:59 +02:00
log . logSevere ( " Exception: " + e . getMessage ( ) , e ) ;
2008-08-02 14:12:04 +02:00
} catch ( final Error e ) {
2006-06-09 07:38:59 +02:00
log . logSevere ( " Error: " + e . getMessage ( ) , e ) ;
2005-10-05 12:45:33 +02:00
}
}
2005-08-02 21:40:29 +02:00
/ * *
2009-04-16 17:29:00 +02:00
* Reads all words from the given file and creates a treemap , where key is
2005-08-02 21:40:29 +02:00
* the plasma word hash and value is the word itself .
*
* @param wordlist File where the words are stored .
* @return HashMap with the hash - word - relation .
* /
2009-04-16 17:29:00 +02:00
private static TreeMap < byte [ ] , String > loadWordMap ( final File wordlist ) {
2005-05-11 11:44:36 +02:00
// returns a hash-word - Relation
2009-04-16 17:29:00 +02:00
final TreeMap < byte [ ] , String > wordmap = new TreeMap < byte [ ] , String > ( Base64Order . enhancedCoder ) ;
2005-04-07 21:19:42 +02:00
try {
String word ;
2008-08-02 14:12:04 +02:00
final BufferedReader br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( wordlist ) ) ) ;
2009-03-02 00:58:14 +01:00
while ( ( word = br . readLine ( ) ) ! = null ) wordmap . put ( Word . word2hash ( word ) , word ) ;
2005-04-07 21:19:42 +02:00
br . close ( ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) { }
2005-04-07 21:19:42 +02:00
return wordmap ;
}
2005-08-02 21:40:29 +02:00
/ * *
* Cleans a wordlist in a file according to the length of the words . The
* file with the given filename is read and then only the words in the given
* length - range are written back to the file .
*
* @param wordlist Name of the file the words are stored in .
* @param minlength Minimal needed length for each word to be stored .
* @param maxlength Maximal allowed length for each word to be stored .
* /
2008-08-02 14:12:04 +02:00
private static void cleanwordlist ( final String wordlist , final int minlength , final int maxlength ) {
2005-04-07 21:19:42 +02:00
// start up
System . out . println ( copyright ) ;
System . out . println ( hline ) ;
2009-01-31 00:33:47 +01:00
Log . logConfig ( " CLEAN-WORDLIST " , " START " ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
String word ;
2008-08-02 14:12:04 +02:00
final TreeSet < String > wordset = new TreeSet < String > ( ) ;
2005-04-07 21:19:42 +02:00
int count = 0 ;
try {
2008-08-02 14:12:04 +02:00
final BufferedReader br = new BufferedReader ( new InputStreamReader ( new FileInputStream ( wordlist ) ) ) ;
final String seps = " ' .,:/-& " ;
2005-04-07 21:19:42 +02:00
while ( ( word = br . readLine ( ) ) ! = null ) {
word = word . toLowerCase ( ) . trim ( ) ;
2005-05-11 11:44:36 +02:00
for ( int i = 0 ; i < seps . length ( ) ; i + + ) {
2005-04-07 21:19:42 +02:00
if ( word . indexOf ( seps . charAt ( i ) ) > = 0 ) word = word . substring ( 0 , word . indexOf ( seps . charAt ( i ) ) ) ;
2005-05-11 11:44:36 +02:00
}
2005-04-07 21:19:42 +02:00
if ( ( word . length ( ) > = minlength ) & & ( word . length ( ) < = maxlength ) ) wordset . add ( word ) ;
count + + ;
}
br . close ( ) ;
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
if ( wordset . size ( ) ! = count ) {
count = count - wordset . size ( ) ;
2008-08-02 14:12:04 +02:00
final BufferedWriter bw = new BufferedWriter ( new PrintWriter ( new FileWriter ( wordlist ) ) ) ;
2009-12-02 01:37:59 +01:00
while ( ! wordset . isEmpty ( ) ) {
2008-06-06 18:01:27 +02:00
word = wordset . first ( ) ;
2005-04-07 21:19:42 +02:00
bw . write ( word + " \ n " ) ;
wordset . remove ( word ) ;
}
bw . close ( ) ;
2009-01-31 00:33:47 +01:00
Log . logInfo ( " CLEAN-WORDLIST " , " shrinked wordlist by " + count + " words. " ) ;
2005-04-07 21:19:42 +02:00
} else {
2009-01-31 00:33:47 +01:00
Log . logInfo ( " CLEAN-WORDLIST " , " not necessary to change wordlist " ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " CLEAN-WORDLIST " , " ERROR: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
System . exit ( - 1 ) ;
}
2005-08-02 21:40:29 +02:00
2005-04-07 21:19:42 +02:00
// finished
2009-01-31 00:33:47 +01:00
Log . logConfig ( " CLEAN-WORDLIST " , " FINISHED " ) ;
2005-04-07 21:19:42 +02:00
}
2005-08-02 21:40:29 +02:00
2008-08-02 14:12:04 +02:00
private static void transferCR ( final String targetaddress , final String crfile ) {
final File f = new File ( crfile ) ;
2005-11-11 00:48:20 +01:00
try {
2009-01-31 02:06:56 +01:00
final byte [ ] b = FileUtils . read ( f ) ;
2008-08-02 14:12:04 +02:00
final String result = yacyClient . transfer ( targetaddress , f . getName ( ) , b ) ;
2005-11-11 00:48:20 +01:00
if ( result = = null )
2009-01-31 00:33:47 +01:00
Log . logInfo ( " TRANSFER-CR " , " transmitted file " + crfile + " to " + targetaddress + " successfully " ) ;
2005-11-11 00:48:20 +01:00
else
2009-01-31 00:33:47 +01:00
Log . logInfo ( " TRANSFER-CR " , " error transmitting file " + crfile + " to " + targetaddress + " : " + result ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2009-01-31 00:33:47 +01:00
Log . logInfo ( " TRANSFER-CR " , " could not read file " + crfile ) ;
2005-11-11 00:48:20 +01:00
}
}
2005-12-07 02:40:52 +01:00
2008-08-02 14:12:04 +02:00
private static String [ ] shift ( final String [ ] args , final int pos , final int count ) {
final String [ ] newargs = new String [ args . length - count ] ;
2005-12-07 02:40:52 +01:00
System . arraycopy ( args , 0 , newargs , 0 , pos ) ;
System . arraycopy ( args , pos + count , newargs , pos , args . length - pos - count ) ;
return newargs ;
}
2005-12-07 12:10:08 +01:00
/ * *
* Uses an Iteration over urlHash . db to detect malformed URL - Entries .
* Damaged URL - Entries will be marked in a HashSet and removed at the end of the function .
*
* @param homePath Root - Path where all information is to be found .
* /
2008-08-02 14:12:04 +02:00
private static void urldbcleanup ( final File homePath , final String networkName ) {
final File root = homePath ;
final File indexroot = new File ( root , " DATA/INDEX " ) ;
2009-01-31 00:33:47 +01:00
try { Log . configureLogging ( homePath , new File ( homePath , " DATA/LOG/yacy.logging " ) ) ; } catch ( final Exception e ) { }
2009-10-09 16:44:20 +02:00
final MetadataRepository currentUrlDB = new MetadataRepository ( new File ( new File ( indexroot , networkName ) , " TEXT " ) , " text.urlmd " , false , false ) ;
2008-03-26 15:13:05 +01:00
currentUrlDB . deadlinkCleaner ( null ) ;
2007-03-09 09:48:47 +01:00
currentUrlDB . close ( ) ;
2005-12-07 12:10:08 +01:00
}
2008-08-02 14:12:04 +02:00
private static void RWIHashList ( final File homePath , final String targetName , final String resource , final String format ) {
2009-05-28 16:26:05 +02:00
Segment WordIndex = null ;
2009-01-31 00:33:47 +01:00
final Log log = new Log ( " HASHLIST " ) ;
2008-08-02 14:12:04 +02:00
final File indexPrimaryRoot = new File ( homePath , " DATA/INDEX " ) ;
final String wordChunkStartHash = " AAAAAAAAAAAA " ;
2009-01-31 00:33:47 +01:00
try { Log . configureLogging ( homePath , new File ( homePath , " DATA/LOG/yacy.logging " ) ) ; } catch ( final Exception e ) { }
2006-01-04 14:55:45 +01:00
log . logInfo ( " STARTING CREATION OF RWI-HASHLIST " ) ;
2008-08-02 14:12:04 +02:00
final File root = homePath ;
2006-01-04 14:55:45 +01:00
try {
2009-04-15 08:34:27 +02:00
Iterator < ReferenceContainer < WordReference > > indexContainerIterator = null ;
2006-01-10 02:04:22 +01:00
if ( resource . equals ( " all " ) ) {
2009-05-28 16:26:05 +02:00
WordIndex = new Segment (
log ,
new File ( new File ( indexPrimaryRoot , " freeworld " ) , " TEXT " ) ,
2009-05-25 08:59:21 +02:00
10000 ,
2009-09-07 22:30:57 +02:00
( long ) Integer . MAX_VALUE , false , false ) ;
2009-05-29 12:03:35 +02:00
indexContainerIterator = WordIndex . termIndex ( ) . references ( wordChunkStartHash . getBytes ( ) , false , false ) ;
2006-12-05 03:47:51 +01:00
}
2006-01-04 14:55:45 +01:00
int counter = 0 ;
2009-04-15 08:34:27 +02:00
ReferenceContainer < WordReference > container = null ;
2006-01-15 11:29:48 +01:00
if ( format . equals ( " zip " ) ) {
log . logInfo ( " Writing Hashlist to ZIP-file: " + targetName + " .zip " ) ;
2008-08-02 14:12:04 +02:00
final ZipEntry zipEntry = new ZipEntry ( targetName + " .txt " ) ;
final File file = new File ( root , targetName + " .zip " ) ;
final ZipOutputStream bos = new ZipOutputStream ( new FileOutputStream ( file ) ) ;
2006-01-15 11:29:48 +01:00
bos . putNextEntry ( zipEntry ) ;
2008-06-06 18:01:27 +02:00
if ( indexContainerIterator ! = null ) {
while ( indexContainerIterator . hasNext ( ) ) {
counter + + ;
container = indexContainerIterator . next ( ) ;
2009-04-16 17:29:00 +02:00
bos . write ( container . getTermHash ( ) ) ;
2008-06-06 18:01:27 +02:00
bos . write ( serverCore . CRLF ) ;
if ( counter % 500 = = 0 ) {
2009-04-18 16:35:18 +02:00
log . logInfo ( " Found " + counter + " Hashs until now. Last found Hash: " + container . getTermHashAsString ( ) ) ;
2008-06-06 18:01:27 +02:00
}
2006-01-15 11:29:48 +01:00
}
}
2007-01-31 10:22:22 +01:00
bos . flush ( ) ;
2006-01-15 11:29:48 +01:00
bos . close ( ) ;
2006-07-26 13:21:51 +02:00
} else {
2006-01-15 11:29:48 +01:00
log . logInfo ( " Writing Hashlist to TXT-file: " + targetName + " .txt " ) ;
2008-08-02 14:12:04 +02:00
final File file = new File ( root , targetName + " .txt " ) ;
final BufferedOutputStream bos = new BufferedOutputStream ( new FileOutputStream ( file ) ) ;
2008-06-06 18:01:27 +02:00
if ( indexContainerIterator ! = null ) {
while ( indexContainerIterator . hasNext ( ) ) {
counter + + ;
container = indexContainerIterator . next ( ) ;
2009-04-16 17:29:00 +02:00
bos . write ( container . getTermHash ( ) ) ;
2008-06-06 18:01:27 +02:00
bos . write ( serverCore . CRLF ) ;
if ( counter % 500 = = 0 ) {
2009-04-18 16:35:18 +02:00
log . logInfo ( " Found " + counter + " Hashs until now. Last found Hash: " + container . getTermHashAsString ( ) ) ;
2008-06-06 18:01:27 +02:00
}
2006-01-15 11:29:48 +01:00
}
2006-01-04 14:55:45 +01:00
}
2007-01-31 10:22:22 +01:00
bos . flush ( ) ;
2006-01-15 11:29:48 +01:00
bos . close ( ) ;
2006-01-04 14:55:45 +01:00
}
2009-04-18 16:35:18 +02:00
log . logInfo ( " Total number of Hashs: " + counter + " . Last found Hash: " + ( container = = null ? " null " : container . getTermHashAsString ( ) ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IOException e ) {
2006-04-20 10:20:12 +02:00
log . logSevere ( " IOException " , e ) ;
2006-01-10 02:04:22 +01:00
}
2006-01-14 00:59:04 +01:00
if ( WordIndex ! = null ) {
2006-12-05 03:47:51 +01:00
WordIndex . close ( ) ;
2006-01-14 00:59:04 +01:00
WordIndex = null ;
}
2006-01-04 14:55:45 +01:00
}
2006-01-30 09:28:22 +01:00
/ * *
2007-07-19 17:32:10 +02:00
* Searching for peers affected by Bug
2006-01-30 09:28:22 +01:00
* @param homePath
* /
2008-08-02 14:12:04 +02:00
public static void testPeerDB ( final File homePath ) {
2006-01-30 09:28:22 +01:00
try {
2008-08-02 14:12:04 +02:00
final File yacyDBPath = new File ( homePath , " DATA/INDEX/freeworld/NETWORK " ) ;
2006-01-30 09:28:22 +01:00
2008-08-02 14:12:04 +02:00
final String [ ] dbFileNames = { " seed.new.db " , " seed.old.db " , " seed.pot.db " } ;
2006-01-30 09:28:22 +01:00
for ( int i = 0 ; i < dbFileNames . length ; i + + ) {
2008-08-02 14:12:04 +02:00
final File dbFile = new File ( yacyDBPath , dbFileNames [ i ] ) ;
2010-01-13 17:21:37 +01:00
final MapDataMining db = new MapDataMining ( dbFile , Word . commonHashLength , Base64Order . enhancedCoder , 1024 * 512 , 500 , yacySeedDB . sortFields , yacySeedDB . longaccFields , yacySeedDB . doubleaccFields , null , null ) ;
2006-01-30 09:28:22 +01:00
2009-01-30 23:44:20 +01:00
MapDataMining . mapIterator it ;
2006-01-30 09:28:22 +01:00
it = db . maps ( true , false ) ;
while ( it . hasNext ( ) ) {
2008-08-02 14:12:04 +02:00
final Map < String , String > dna = it . next ( ) ;
2008-06-06 18:01:27 +02:00
String peerHash = dna . get ( " key " ) ;
2009-10-11 02:12:19 +02:00
if ( peerHash . length ( ) < Word . commonHashLength ) {
2008-08-02 14:12:04 +02:00
final String peerName = dna . get ( " Name " ) ;
final String peerIP = dna . get ( " IP " ) ;
final String peerPort = dna . get ( " Port " ) ;
2006-01-30 09:28:22 +01:00
2009-10-11 02:12:19 +02:00
while ( peerHash . length ( ) < Word . commonHashLength ) { peerHash = peerHash + " _ " ; }
2006-01-30 09:31:14 +01:00
System . err . println ( " Invalid Peer-Hash found in ' " + dbFileNames [ i ] + " ': " + peerName + " : " + peerHash + " , http:// " + peerIP + " : " + peerPort ) ;
2006-01-30 09:28:22 +01:00
}
}
db . close ( ) ;
}
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2006-01-30 09:28:22 +01:00
}
}
2006-01-10 02:04:22 +01:00
2005-08-02 21:40:29 +02:00
/ * *
2005-12-11 01:25:02 +01:00
* Main - method which is started by java . Checks for special arguments or
* starts up the application .
*
* @param args
* Given arguments from the command line .
* /
2005-04-07 21:19:42 +02:00
public static void main ( String args [ ] ) {
2005-10-12 14:34:08 +02:00
2006-10-23 02:59:55 +02:00
// check assertion status
//ClassLoader.getSystemClassLoader().setDefaultAssertionStatus(true);
boolean assertionenabled = false ;
assert assertionenabled = true ;
if ( assertionenabled ) System . out . println ( " Asserts are enabled " ) ;
2005-10-12 14:34:08 +02:00
// check memory amount
2005-09-21 02:12:37 +02:00
System . gc ( ) ;
2009-01-30 16:33:00 +01:00
final long startupMemFree = MemoryControl . free ( ) ;
final long startupMemTotal = MemoryControl . total ( ) ;
2006-08-18 03:33:54 +02:00
2005-10-12 14:34:08 +02:00
// go into headless awt mode
System . setProperty ( " java.awt.headless " , " true " ) ;
2005-09-21 14:21:01 +02:00
2008-02-01 00:40:47 +01:00
File applicationRoot = new File ( System . getProperty ( " user.dir " ) . replace ( '\\' , '/' ) ) ;
2005-07-01 01:19:08 +02:00
//System.out.println("args.length=" + args.length);
//System.out.print("args=["); for (int i = 0; i < args.length; i++) System.out.print(args[i] + ", "); System.out.println("]");
2006-07-19 13:20:22 +02:00
if ( ( args . length > = 1 ) & & ( ( args [ 0 ] . toLowerCase ( ) . equals ( " -startup " ) ) | | ( args [ 0 ] . equals ( " -start " ) ) ) ) {
2005-04-07 21:19:42 +02:00
// normal start-up of yacy
2008-02-01 00:40:47 +01:00
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
2005-09-21 14:21:01 +02:00
startup ( applicationRoot , startupMemFree , startupMemTotal ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( ( args [ 0 ] . toLowerCase ( ) . equals ( " -shutdown " ) ) | | ( args [ 0 ] . equals ( " -stop " ) ) ) ) {
2005-04-07 21:19:42 +02:00
// normal shutdown of yacy
2008-02-01 00:40:47 +01:00
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
2005-04-07 21:19:42 +02:00
shutdown ( applicationRoot ) ;
2009-06-11 13:31:26 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -update " ) ) ) {
// aut-update yacy
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
update ( applicationRoot ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -minimizeurldb " ) ) ) {
2005-10-05 12:45:33 +02:00
// migrate words from DATA/PLASMADB/WORDS path to assortment cache, if possible
// attention: this may run long and should not be interrupted!
2006-07-19 13:20:22 +02:00
if ( args . length > = 3 & & args [ 1 ] . toLowerCase ( ) . equals ( " -cache " ) ) {
2006-04-20 10:20:12 +02:00
args = shift ( args , 1 , 2 ) ;
}
2008-02-01 00:40:47 +01:00
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
2008-03-16 23:31:54 +01:00
minimizeUrlDB ( applicationRoot , " freeworld " ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -testpeerdb " ) ) ) {
2006-01-30 09:28:22 +01:00
if ( args . length = = 2 ) {
2008-02-01 00:40:47 +01:00
applicationRoot = new File ( args [ 1 ] ) ;
2006-01-30 09:28:22 +01:00
} else if ( args . length > 2 ) {
System . err . println ( " Usage: -testPeerDB [homeDbRoot] " ) ;
}
testPeerDB ( applicationRoot ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -genwordstat " ) ) ) {
2005-04-07 21:19:42 +02:00
// this can help to create a stop-word list
2005-05-11 11:44:36 +02:00
// to use this, you need a 'yacy.words' file in the root path
// start this with "java -classpath classes yacy -genwordstat [<rootdir>]"
2008-02-01 00:40:47 +01:00
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
2005-04-07 21:19:42 +02:00
genWordstat ( applicationRoot ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length = = 4 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -cleanwordlist " ) ) ) {
2005-04-07 21:19:42 +02:00
// this can be used to organize and clean a word-list
2005-05-11 11:44:36 +02:00
// start this with "java -classpath classes yacy -cleanwordlist <word-file> <minlength> <maxlength>"
2008-08-02 14:12:04 +02:00
final int minlength = Integer . parseInt ( args [ 2 ] ) ;
final int maxlength = Integer . parseInt ( args [ 3 ] ) ;
2005-04-07 21:19:42 +02:00
cleanwordlist ( args [ 1 ] , minlength , maxlength ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -transfercr " ) ) ) {
2005-11-11 00:48:20 +01:00
// transfer a single cr file to a remote peer
2008-08-02 14:12:04 +02:00
final String targetaddress = args [ 1 ] ;
final String crfile = args [ 2 ] ;
2005-11-21 02:30:30 +01:00
transferCR ( targetaddress , crfile ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -urldbcleanup " ) ) ) {
2005-12-07 12:10:08 +01:00
// generate a url list and save it in a file
2008-02-01 00:40:47 +01:00
if ( args . length = = 2 ) applicationRoot = new File ( args [ 1 ] ) ;
2008-03-16 23:31:54 +01:00
urldbcleanup ( applicationRoot , " freeworld " ) ;
2006-07-19 13:20:22 +02:00
} else if ( ( args . length > = 1 ) & & ( args [ 0 ] . toLowerCase ( ) . equals ( " -rwihashlist " ) ) ) {
2006-01-04 14:55:45 +01:00
// generate a url list and save it in a file
2006-01-10 02:04:22 +01:00
String domain = " all " ;
2006-01-15 11:29:48 +01:00
String format = " txt " ;
2006-01-10 02:04:22 +01:00
if ( args . length > = 2 ) domain = args [ 1 ] ;
2006-01-15 11:29:48 +01:00
if ( args . length > = 3 ) format = args [ 2 ] ;
2008-02-01 00:40:47 +01:00
if ( args . length = = 4 ) applicationRoot = new File ( args [ 3 ] ) ;
2008-08-02 14:12:04 +02:00
final String outfile = " rwihashlist_ " + System . currentTimeMillis ( ) ;
2006-01-15 11:29:48 +01:00
RWIHashList ( applicationRoot , outfile , domain , format ) ;
2005-04-07 21:19:42 +02:00
} else {
2008-02-01 00:40:47 +01:00
if ( args . length = = 1 ) applicationRoot = new File ( args [ 0 ] ) ;
2005-09-21 14:21:01 +02:00
startup ( applicationRoot , startupMemFree , startupMemTotal ) ;
2005-04-07 21:19:42 +02:00
}
}
}
2005-05-11 11:44:36 +02:00
2005-08-02 21:40:29 +02:00
/ * *
* This class is a helper class whose instance is started , when the java virtual
* machine shuts down . Signals the plasmaSwitchboard to shut down .
* /
2005-07-03 14:40:36 +02:00
class shutdownHookThread extends Thread {
2010-01-11 23:18:38 +01:00
private final Switchboard sb ;
private final Thread mainThread ;
2005-08-02 21:40:29 +02:00
2009-07-19 22:37:44 +02:00
public shutdownHookThread ( final Thread mainThread , final Switchboard sb ) {
2005-09-20 17:36:22 +02:00
super ( ) ;
2005-09-21 02:12:37 +02:00
this . sb = sb ;
2005-05-11 11:44:36 +02:00
this . mainThread = mainThread ;
}
2005-08-02 21:40:29 +02:00
2005-07-03 14:40:36 +02:00
public void run ( ) {
2005-05-11 11:44:36 +02:00
try {
2005-09-21 02:12:37 +02:00
if ( ! this . sb . isTerminated ( ) ) {
2009-01-31 00:33:47 +01:00
Log . logConfig ( " SHUTDOWN " , " Shutdown via shutdown hook. " ) ;
2005-08-02 21:40:29 +02:00
2005-05-11 11:44:36 +02:00
// sending the yacy main thread a shutdown signal
2009-01-31 00:33:47 +01:00
Log . logFine ( " SHUTDOWN " , " Signaling shutdown to the switchboard. " ) ;
2009-12-08 15:25:51 +01:00
this . sb . terminate ( " shutdown hook " ) ;
2005-08-02 21:40:29 +02:00
2005-05-11 11:44:36 +02:00
// waiting for the yacy thread to finish execution
2009-01-31 00:33:47 +01:00
Log . logFine ( " SHUTDOWN " , " Waiting for main thread to finish. " ) ;
2006-09-20 12:13:23 +02:00
if ( this . mainThread . isAlive ( ) & & ! this . sb . isTerminated ( ) ) {
this . mainThread . join ( ) ;
}
2005-05-11 11:44:36 +02:00
}
2008-08-02 14:12:04 +02:00
} catch ( final Exception e ) {
2009-01-31 00:33:47 +01:00
Log . logSevere ( " SHUTDOWN " , " Unexpected error. " + e . getClass ( ) . getName ( ) , e ) ;
2005-05-11 11:44:36 +02:00
}
}
2005-09-22 20:54:36 +02:00
}