yacy_search_server/source/yacy.java

// yacy.java
// -----------------------
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.yacy.net
// Frankfurt, Germany, 2004, 2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

import de.anomic.data.translator;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.http.httpd;
import de.anomic.http.httpdFileHandler;
import de.anomic.http.httpdProxyHandler;
import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntry;
import de.anomic.index.indexRWIEntryNew;
import de.anomic.index.indexURLEntry;
import de.anomic.index.indexURLEntryOld;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroDyn;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroMapObjects;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.plasmaWordIndexAssortment;
import de.anomic.plasma.plasmaWordIndexFile;
import de.anomic.plasma.plasmaWordIndexFileCluster;
import de.anomic.plasma.dbImport.AssortmentImporter;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverMemory;
import de.anomic.server.serverSystem;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.enumerateFiles;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeedDB;

/**
* This is the main class of YaCy. Several threads are started from here:
* <ul>
* <li>one single instance of the plasmaSwitchboard is generated, which itself
* starts a thread with a plasmaHTMLCache object. This object simply counts
* files sizes in the cache and terminates them. It also generates a
* plasmaCrawlerLoader object, which may itself start some more httpc-calling
* threads to load web pages. They terminate automatically when a page has
* loaded.
* <li>one serverCore - thread is started, which implements a multi-threaded
* server. The process may start itself many more processes that handle
* connections.lo
* <li>finally, all idle-dependent processes are written in a queue in
* plasmaSwitchboard which are worked off inside an idle-sensitive loop of the
* main process. (here)
* </ul>
*
* On termination, the following must be done:
* <ul>
* <li>stop feeding of the crawling process because it othervise fills the
* indexing queue.
* <li>say goodbye to connected peers and disable new connections. Don't wait for
* success.
* <li>first terminate the serverCore thread. This prevents that new cache
* objects are queued.
* <li>wait that the plasmaHTMLCache terminates (it should be normal that this
* process already has terminated).
* <li>then wait for termination of all loader process of the
* plasmaCrawlerLoader.
* <li>work off the indexing and cache storage queue. These values are inside a
* RAM cache and would be lost otherwise.
* <li>write all settings.
* <li>terminate.
* </ul>
*/

public final class yacy {
    // static objects
    private static String vString = "@REPL_VERSION@";
    private static double version = 0.1;

    private static final String vDATE   = "@REPL_DATE@";
    private static final String copyright = "[ YaCy v" + vString + ", build " + vDATE + " by Michael Christen / www.yacy.net ]";
    private static final String hline = "-------------------------------------------------------------------------------";
   
    /**
    * Converts combined version-string to a pretty string, e.g. "0.435/01818" or "dev/01818" (development version) or "dev/00000" (in case of wrong input)
    *
    * @param ver Combined version string matching regular expression:  "\A(\d+\.\d{3})(\d{4}|\d{5})\z" <br>
    *  (i.e.: start of input, 1 or more digits in front of decimal point, decimal point followed by 3 digits as major version, 4 or 5 digits for SVN-Version, end of input) 
    * @return If the major version is &lt; 0.11  - major version is separated from SVN-version by '/', e.g. "0.435/01818" <br>
    *         If the major version is &gt;= 0.11 - major version is replaced by "dev" and separated SVN-version by '/', e.g."dev/01818" <br> 
    *         "dev/00000" - If the input does not matcht the regular expression above 
    */
    public static String combined2prettyVersion(String ver) {
        return combined2prettyVersion(ver, "");
    }
    public static String combined2prettyVersion(String ver, String computerName) {
        final Matcher matcher = Pattern.compile("\\A(\\d+\\.\\d{1,3})(\\d{0,5})\\z").matcher(ver); 

        if (!matcher.find()) { 
            serverLog.logWarning("STARTUP", "Peer '"+computerName+"': wrong format of version-string: '" + ver + "'. Using default string 'dev/00000' instead");   
            return "dev/00000";
        }
        
        String mainversion = (Double.parseDouble(matcher.group(1)) < 0.11 ? "dev" : matcher.group(1));
		String revision = matcher.group(2);
		for(int i=revision.length();i<5;++i) revision += "0";
		return mainversion+"/"+revision;
    }
       
    /**
    * Combines the version of YaCy with the versionnumber from SVN to a
    * combined version
    *
    * @param version Current given version.
    * @param svn Current version given from SVN.
    * @return String with the combined version.
    */
    public static double versvn2combinedVersion(double v, int svn) {
    	return (Math.rint((v*100000000.0) + ((double)svn))/100000000);
    }

    /**
    * Starts up the whole application. Sets up all datastructures and starts
    * the main threads.
    *
    * @param homePath Root-path where all information is to be found.
    * @param startupFree free memory at startup time, to be used later for statistics
    */
    private static void startup(String homePath, long startupMemFree, long startupMemTotal) {
        long startup = System.currentTimeMillis();

        int oldRev=0;
        int newRev=0;

        try {
            // start up
            System.out.println(copyright);
            System.out.println(hline);

            // check java version
            try {
                /*String[] check =*/ "a,b".split(","); // split needs java 1.4
            } catch (NoSuchMethodError e) {
                System.err.println("STARTUP: Java Version too low. You need at least Java 1.4.2 to run YaCy");
                Thread.sleep(3000);
                System.exit(-1);
            }
            
            // ensure that there is a DATA directory, if not, create one and if that fails warn and die
            File f = new File(homePath); if (!(f.exists())) f.mkdirs();
            f = new File(homePath, "DATA/"); if (!(f.exists())) f.mkdirs();
			if (!(f.exists())) { 
				System.err.println("Error creating DATA-directory in " + homePath.toString() + " . Please check your write-permission for this folder. YaCy will now terminate."); 
				System.exit(-1); 
			}
			
			f = new File(homePath, "DATA/yacy.running");
			if (!f.exists()) f.createNewFile(); f.deleteOnExit();
            
            // setting up logging
            f = new File(homePath, "DATA/LOG/"); if (!(f.exists())) f.mkdirs();
            if (!((new File(homePath, "DATA/LOG/yacy.logging")).exists())) try {
                serverFileUtils.copy(new File(homePath, "yacy.logging"), new File(homePath, "DATA/LOG/yacy.logging"));
            }catch (IOException e){
                System.out.println("could not copy yacy.logging");
            }
            try{
                serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));
            } catch (IOException e) {
                System.out.println("could not find logging properties in homePath=" + homePath);
                e.printStackTrace();
            }
            serverLog.logConfig("STARTUP", "java version " + System.getProperty("java.version", "no-java-version"));
            serverLog.logConfig("STARTUP", "Application Root Path: " + homePath);
            serverLog.logConfig("STARTUP", "Time Zone: UTC" + serverDate.UTCDiffString() + "; UTC+0000 is " + System.currentTimeMillis());
            serverLog.logConfig("STARTUP", "Maximum file system path length: " + serverSystem.maxPathLength);

            /*
                // Testing if the yacy archive file were unzipped correctly.
                // This test is needed because of classfile-names longer than 100 chars
                // which could cause problems with incompatible unzip software.
                // See:
                // - http://www.yacy-forum.de/viewtopic.php?t=1763
                // - http://www.yacy-forum.de/viewtopic.php?t=715
                // - http://www.yacy-forum.de/viewtopic.php?t=1674
                File unzipTest = new File(homePath,"doc/This_is_a_test_if_the_archive_file_containing_YaCy_was_unpacked_correctly_If_not_please_use_gnu_tar_instead.txt");
                if (!unzipTest.exists()) {
                    String errorMsg = "The archive file containing YaCy was not unpacked correctly. " +
                                      "Please use 'GNU-Tar' or upgrade to a newer version of your unzip software.\n" +
                                      "For detailed information on this bug see: " + 
                                      "http://www.yacy-forum.de/viewtopic.php?t=715";
                    System.err.println(errorMsg);
                    serverLog.logSevere("STARTUP", errorMsg);
                    System.exit(1); 
                }
            */                    
            
            final plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
            
            // save information about available memory at startup time
            sb.setConfig("memoryFreeAfterStartup", startupMemFree);
            sb.setConfig("memoryTotalAfterStartup", startupMemTotal);
            
            // hardcoded, forced, temporary value-migration
            sb.setConfig("htTemplatePath", "htroot/env/templates");
            sb.setConfig("parseableExt", "html,htm,txt,php,shtml,asp");

            // if we are running an SVN version, we try to detect the used svn revision now ...
            final Properties buildProp = new Properties();
            File buildPropFile = null;
            try {
                buildPropFile = new File(homePath,"build.properties");
                buildProp.load(new FileInputStream(buildPropFile));
            } catch (Exception e) {
                serverLog.logWarning("STARTUP", buildPropFile.toString() + " not found in settings path");
            }
            
            oldRev=Integer.parseInt(sb.getConfig("svnRevision", "0"));
            try {
                if (buildProp.containsKey("releaseNr")) {
                    // this normally looks like this: $Revision$
                    final String svnReleaseNrStr = buildProp.getProperty("releaseNr");
                    final Pattern pattern = Pattern.compile("\\$Revision:\\s(.*)\\s\\$",Pattern.DOTALL+Pattern.CASE_INSENSITIVE);
                    final Matcher matcher = pattern.matcher(svnReleaseNrStr);
                    if (matcher.find()) {
                        final String svrReleaseNr = matcher.group(1);
                        try {
                            try {version = Double.parseDouble(vString);} catch (NumberFormatException e) {version = (float) 0.1;}
                            version = versvn2combinedVersion(version, Integer.parseInt(svrReleaseNr));
                        } catch (NumberFormatException e) {}
                        sb.setConfig("svnRevision", svrReleaseNr);
                    }
                }
                newRev=Integer.parseInt(sb.getConfig("svnRevision", "0"));
            } catch (Exception e) {
                System.err.println("Unable to determine the currently used SVN revision number.");
            }

            sb.setConfig("version", Double.toString(version));
            sb.setConfig("vString", combined2prettyVersion(Double.toString(version)));
            sb.setConfig("vdate", vDATE);
            sb.setConfig("applicationRoot", homePath);
            sb.startupTime = startup;
            serverLog.logConfig("STARTUP", "YACY Version: " + version + ", Built " + vDATE);
            yacyCore.latestVersion = version;

            // read environment
            int timeout       = Integer.parseInt(sb.getConfig("httpdTimeout", "60000"));
            if (timeout < 60000) timeout = 60000;

            // create some directories
            final File htRootPath = new File(homePath, sb.getConfig("htRootPath", "htroot"));
            final File htDocsPath = new File(homePath, sb.getConfig("htDocsPath", "DATA/HTDOCS"));
            //final File htTemplatePath = new File(homePath, sb.getConfig("htTemplatePath","htdocs"));

            // create default notifier picture
            //TODO: Use templates instead of copying images ...
            if (!((new File(htDocsPath, "notifier.gif")).exists())) try {
                serverFileUtils.copy(new File(htRootPath, "env/grafics/empty.gif"),
                                     new File(htDocsPath, "notifier.gif"));
            } catch (IOException e) {}

            if (!(htDocsPath.exists())) htDocsPath.mkdir();
            final File htdocsDefaultReadme = new File(htDocsPath, "readme.txt");
            if (!(htdocsDefaultReadme.exists())) try {serverFileUtils.write((
                    "This is your root directory for individual Web Content\r\n" +
                    "\r\n" +
                    "Please place your html files into the www subdirectory.\r\n" +
                    "The URL of that path is either\r\n" +
                    "http://www.<your-peer-name>.yacy    or\r\n" +
                    "http://<your-ip>:<your-port>/www\r\n" +
                    "\r\n" +
                    "Other subdirectories may be created; they map to corresponding sub-domains.\r\n" +
                    "This directory shares it's content with the applications htroot path, so you\r\n" +
                    "may access your yacy search page with\r\n" +
                    "http://<your-peer-name>.yacy/\r\n" +
                    "\r\n").getBytes(), htdocsDefaultReadme);} catch (IOException e) {
                        System.out.println("Error creating htdocs readme: " + e.getMessage());
                    }

            final File wwwDefaultPath = new File(htDocsPath, "www");
            if (!(wwwDefaultPath.exists())) wwwDefaultPath.mkdir();


            final File shareDefaultPath = new File(htDocsPath, "share");
            if (!(shareDefaultPath.exists())) shareDefaultPath.mkdir();

            migration.migrate(sb, oldRev, newRev);
            
            // start main threads
            final String port = sb.getConfig("port", "8080");
            try {
                final httpd protocolHandler = new httpd(sb, new httpdFileHandler(sb), new httpdProxyHandler(sb));
                final serverCore server = new serverCore(
                        timeout /*control socket timeout in milliseconds*/,
                        true /* block attacks (wrong protocol) */,
                        protocolHandler /*command class*/,
                        sb,
                        30000 /*command max length incl. GET args*/);
                server.setName("httpd:"+port);
                server.setPriority(Thread.MAX_PRIORITY);
                server.setObeyIntermission(false);
                if (server == null) {
                    serverLog.logSevere("STARTUP", "Failed to start server. Probably port " + port + " already in use.");
                } else {
                    // first start the server
                    sb.deployThread("10_httpd", "HTTPD Server/Proxy", "the HTTPD, used as web server and proxy", null, server, 0, 0, 0, 0);
                    //server.start();

                    // open the browser window
                    final boolean browserPopUpTrigger = sb.getConfig("browserPopUpTrigger", "true").equals("true");
                    if (browserPopUpTrigger) {
                        String  browserPopUpPage        = sb.getConfig("browserPopUpPage", "ConfigBasic.html");
                        boolean properPW = (sb.getConfig("adminAccount", "").length() == 0) && (sb.getConfig(httpd.ADMIN_ACCOUNT_B64MD5, "").length() > 0);
                        if (!properPW) browserPopUpPage = "ConfigBasic.html";
                        final String  browserPopUpApplication = sb.getConfig("browserPopUpApplication", "netscape");
                        serverSystem.openBrowser((server.withSSL()?"https":"http") + "://localhost:" + serverCore.getPortNr(port) + "/" + browserPopUpPage, browserPopUpApplication);
                    }

                    //Copy the shipped locales into DATA
                    final File localesPath = new File(homePath, sb.getConfig("localesPath", "DATA/LOCALE"));
                    final File defaultLocalesPath = new File(homePath, "locales");
                    

                    try{
                        final File[] defaultLocales = defaultLocalesPath.listFiles();
                        localesPath.mkdirs();
                        for(int i=0;i < defaultLocales.length; i++){
                            if(defaultLocales[i].getName().endsWith(".lng"))
                                serverFileUtils.copy(defaultLocales[i], new File(localesPath, defaultLocales[i].getName()));
                        }
                        serverLog.logInfo("STARTUP", "Copied the default locales to DATA/LOCALE");
                    }catch(NullPointerException e){
                        serverLog.logSevere("STARTUP", "Nullpointer Exception while copying the default Locales");
                    }

                    //regenerate Locales from Translationlist, if needed
                    final String lang = sb.getConfig("htLocaleSelection", "");
                    if(! lang.equals("") && ! lang.equals("default") ){ //locale is used
                        String currentRev = "";
                        try{
                            final BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File( sb.getConfig("htLocalePath", "DATA/HTDOCS/locale"), lang+"/version" ))));
                            currentRev = br.readLine();
                            br.close();
                        }catch(IOException e){
                            //Error
                        }

                        try{ //seperate try, because we want this, even if the file "version" does not exist.
                            if(! currentRev.equals(sb.getConfig("svnRevision", "")) ){ //is this another version?!
                                final File sourceDir = new File(sb.getConfig("htRootPath", "htroot"));
                                final File destDir = new File(sb.getConfig("htLocalePath", "DATA/HTDOCS/locale"), lang);
                                
                              if(translator.translateFilesRecursive(sourceDir, destDir, new File("DATA/LOCALE/"+lang+".lng"), "html,template,inc", "locale")){ //translate it
                                    //write the new Versionnumber
                                    final BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(new File(destDir, "version"))));
                                    bw.write(sb.getConfig("svnRevision", "Error getting Version"));
                                    bw.close();
                                }
                            }
                        }catch(IOException e){
                            //Error
                        }
                    }

                    // registering shutdown hook
                    serverLog.logConfig("STARTUP", "Registering Shutdown Hook");
                    final Runtime run = Runtime.getRuntime();
                    run.addShutdownHook(new shutdownHookThread(Thread.currentThread(), sb));

                    // save information about available memory after all initializations
                    //try {
                        sb.setConfig("memoryFreeAfterInitBGC", Runtime.getRuntime().freeMemory());
                        sb.setConfig("memoryTotalAfterInitBGC", Runtime.getRuntime().totalMemory());
                        System.gc();
                        sb.setConfig("memoryFreeAfterInitAGC", Runtime.getRuntime().freeMemory());
                        sb.setConfig("memoryTotalAfterInitAGC", Runtime.getRuntime().totalMemory());
                    //} catch (ConcurrentModificationException e) {}
                    
                    // wait for server shutdown
                    try {
                        sb.waitForShutdown();
                    } catch (Exception e) {
                        serverLog.logSevere("MAIN CONTROL LOOP", "PANIC: " + e.getMessage(),e);
                    }
                    // shut down
                    serverLog.logConfig("SHUTDOWN", "caught termination signal");
                    server.terminate(false);
                    server.interrupt();
                    if (server.isAlive()) try {
                        URL u = new URL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port));
                        httpc.wget(u, u.getHost(), 1000, null, null, null); // kick server
                        serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket");
                    } catch (IOException ee) {
                        serverLog.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");
                    }

                    // idle until the processes are down
                    while (server.isAlive()) {
                        Thread.sleep(2000); // wait a while
                    }
                    serverLog.logConfig("SHUTDOWN", "server has terminated");
                    sb.close();
                }
            } catch (Exception e) {
                serverLog.logSevere("STARTUP", "Unexpected Error: " + e.getClass().getName(),e);
                //System.exit(1);
            }
        } catch (Exception ee) {
            serverLog.logSevere("STARTUP", "FATAL ERROR: " + ee.getMessage(),ee);
        }
        serverLog.logConfig("SHUTDOWN", "goodbye. (this is the last line)");
        try {
            System.exit(0);
        } catch (Exception e) {} // was once stopped by de.anomic.net.ftpc$sm.checkExit(ftpc.java:1790)
    }

    /**
    * Loads the configuration from the data-folder.
    * FIXME: Why is this called over and over again from every method, instead
    * of setting the configurationdata once for this class in main?
    *
    * @param mes Where are we called from, so that the errormessages can be
    * more descriptive.
    * @param homePath Root-path where all the information is to be found.
    * @return Properties read from the configurationfile.
    */
    private static Properties configuration(String mes, String homePath) {
        serverLog.logConfig(mes, "Application Root Path: " + homePath.toString());

        // read data folder
        File dataFolder = new File(homePath, "DATA");
        if (!(dataFolder.exists())) {
            serverLog.logSevere(mes, "Application was never started or root path wrong.");
            System.exit(-1);
        }

        Properties config = new Properties();
        try {
            config.load(new FileInputStream(new File(homePath, "DATA/SETTINGS/httpProxy.conf")));
        } catch (FileNotFoundException e) {
            serverLog.logSevere(mes, "could not find configuration file.");
            System.exit(-1);
        } catch (IOException e) {
            serverLog.logSevere(mes, "could not read configuration file.");
            System.exit(-1);
        }

        return config;
    }

    static void shutdown() {
        String applicationRoot = System.getProperty("user.dir").replace('\\', '/');
        shutdown(applicationRoot);
    }
    
    /**
    * Call the shutdown-page of YaCy to tell it to shut down. This method is
    * called if you start yacy with the argument -shutdown.
    *
    * @param homePath Root-path where all the information is to be found.
    */
    static void shutdown(String homePath) {
        // start up
        System.out.println(copyright);
        System.out.println(hline);

        Properties config = configuration("REMOTE-SHUTDOWN", homePath);

        // read port
        int port = serverCore.getPortNr(config.getProperty("port", "8080"));

        // read password
        String encodedPassword = (String) config.get(httpd.ADMIN_ACCOUNT_B64MD5);
        if (encodedPassword == null) encodedPassword = ""; // not defined

        // send 'wget' to web interface
        httpHeader requestHeader = new httpHeader();
        requestHeader.put("Authorization", "realm=" + encodedPassword); // for http-authentify
        try {
            httpc con = httpc.getInstance("localhost", "localhost", port, 10000, false);
            httpc.response res = con.GET("Steering.html?shutdown=", requestHeader);

            // read response
            if (res.status.startsWith("2")) {
                serverLog.logConfig("REMOTE-SHUTDOWN", "YACY accepted shutdown command.");
                serverLog.logConfig("REMOTE-SHUTDOWN", "Stand by for termination, which may last some seconds.");
                ByteArrayOutputStream bos = new ByteArrayOutputStream();
                res.writeContent(bos, null);
                con.close();
            } else {
                serverLog.logSevere("REMOTE-SHUTDOWN", "error response from YACY socket: " + res.status);
                System.exit(-1);
            }
        } catch (IOException e) {
            serverLog.logSevere("REMOTE-SHUTDOWN", "could not establish connection to YACY socket: " + e.getMessage());
            System.exit(-1);
        }

        // finished
        serverLog.logConfig("REMOTE-SHUTDOWN", "SUCCESSFULLY FINISHED remote-shutdown:");
        serverLog.logConfig("REMOTE-SHUTDOWN", "YACY will terminate after working off all enqueued tasks.");
    }

    /**
    * This method gets all found words and outputs a statistic about the score
    * of the words. The output of this method can be used to create stop-word
    * lists. This method will be called if you start yacy with the argument
    * -genwordstat.
    * FIXME: How can stop-word list be created from this output? What type of
    * score is output?
    *
    * @param homePath Root-Path where all the information is to be found.
    */
    private static void genWordstat(String homePath) {
        // start up
        System.out.println(copyright);
        System.out.println(hline);

        Properties config = configuration("GEN-WORDSTAT", homePath);

        // load words
        serverLog.logInfo("GEN-WORDSTAT", "loading words...");
        HashMap words = loadWordMap(new File(homePath, "yacy.words"));

        // find all hashes
        serverLog.logInfo("GEN-WORDSTAT", "searching all word-hash databases...");
        File dbRoot = new File(homePath, config.getProperty("dbPath"));
        enumerateFiles ef = new enumerateFiles(new File(dbRoot, "WORDS"), true, false, true, true);
        File f;
        String h;
        kelondroMScoreCluster hs = new kelondroMScoreCluster();
        while (ef.hasMoreElements()) {
            f = (File) ef.nextElement();
            h = f.getName().substring(0, yacySeedDB.commonHashLength);
            hs.addScore(h, (int) f.length());
        }

        // list the hashes in reverse order
        serverLog.logInfo("GEN-WORDSTAT", "listing words in reverse size order...");
        String w;
        Iterator i = hs.scores(false);
        while (i.hasNext()) {
            h = (String) i.next();
            w = (String) words.get(h);
            if (w == null) System.out.print("# " + h); else System.out.print(w);
            System.out.println(" - " + hs.getScore(h));
        }

        // finished
        serverLog.logConfig("GEN-WORDSTAT", "FINISHED");
    }

    /**
    * Migrates the PLASMA WORDS structure to the assortment cache if possible.
    * This method will be called if you start yacy with the argument
    * -migratewords.
    * Caution: This might take a long time to finish. Don't interrupt it!
    * FIXME: Shouldn't this method be private?
    *
    * @param homePath Root-path where all the information is to be found.
    */
    public static void migrateWords(String homePath) {
        // run with "java -classpath classes yacy -migratewords"
        try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
        File dbroot = new File(new File(homePath), "DATA/PLASMADB");
        File indexRoot = new File(new File(homePath), "DATA/INDEX");
        serverLog log = new serverLog("WORDMIGRATION");
        log.logInfo("STARTING MIGRATION");
        plasmaWordIndex wordIndexCache = new plasmaWordIndex(indexRoot, 60000000, 60000000, 10000, log);
        enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
        String wordhash;
        File wordfile;
        int migrationCount;
        while (words.hasMoreElements())
            try {
                wordfile = (File) words.nextElement();
                wordhash = wordfile.getName().substring(0, 12);
                // System.out.println("NOW: " + wordhash);
                migrationCount = migrateWords2index(dbroot, wordhash, wordIndexCache);
                if (migrationCount >= 0) {
                    if (migrationCount == 0)
                        log.logInfo("SKIPPED  " + wordhash + ": empty");
                    else if (migrationCount > 0)
                        log.logInfo("MIGRATED " + wordhash + ": " + migrationCount + " entries");
                    else
                        log.logInfo("REVERSED " + wordhash + ": " + (-migrationCount) + " entries");
                } else {
                    log.logInfo("SKIPPED  " + wordhash);
                }
            } catch (Exception e) {
                log.logSevere("Exception", e);
            }
        log.logInfo("FINISHED MIGRATION JOB, WAIT FOR DUMP");
        wordIndexCache.close();
        log.logInfo("TERMINATED MIGRATION");
    }    
    

    public static int migrateWords2index(File oldDatabaseRoot, String wordhash, plasmaWordIndex wi) throws IOException {
        // returns the number of entries that had been added to the assortments
        // can be negative if some assortments have been moved to the backend
        File db = plasmaWordIndexFile.wordHash2path(oldDatabaseRoot, wordhash);
        if (!(db.exists())) {
            serverLog.logSevere("migrateWordIndex", "word index file for hash " + wordhash + " not found");
            return -1;
        }
        plasmaWordIndexFile entity = null;
        try {
            entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash);
            int size = entity.size();
            indexContainer container = new indexContainer(wordhash, indexRWIEntryNew.urlEntryRow);

            try {
                Iterator entries = entity.elements(true);
                indexRWIEntry entry;
                while (entries.hasNext()) {
                    entry = (indexRWIEntry) entries.next();
                    // System.out.println("ENTRY = " + entry.getUrlHash());
                    container.add(entry, System.currentTimeMillis());
                }
                // we have read all elements, now delete the entity
                entity.deleteComplete();
                entity.close();
                entity = null;

                wi.addEntries(container, container.updated(), false);
                return size;
            } catch (kelondroException e) {
                // database corrupted, we simply give up the database and delete it
                try { entity.close(); } catch (Exception ee) { }
                entity = null;
                try { db.delete(); } catch (Exception ee) { }
                serverLog.logSevere("migrateWordIndex", "database for hash " + wordhash + " corrupted; deleted");
                return -1;
            }
        } finally {
            if (entity != null) try {entity.close();}catch(Exception e){}
        }
    }
    
    public static void migrateAssortments(String homePath) {
        // run with "java -classpath classes yacy -migrateassortments"
        try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
        serverLog log = new serverLog("ASSORTMENTMIGRATION");
        File aclusterroot = new File(new File(homePath), "DATA/PLASMADB/ACLUSTER");
        File indexRoot = new File(new File(homePath), "DATA/INDEX");
        plasmaWordIndex wordIndexCache = new plasmaWordIndex(indexRoot, 60000000, 60000000, 10000, log);
        log.logInfo("STARTING MIGRATION");
        String[] a = aclusterroot.list();
        AssortmentImporter importer = new AssortmentImporter(wordIndexCache);
        for (int i = a.length - 1; i >= 0; i--) {
            if (a[i].startsWith("indexAssortment")) {
                importer.init(new File(aclusterroot, a[i]), 16000000, 2000);
                importer.run();
            }
        }
        log.logInfo("FINISHED MIGRATION JOB, WAIT FOR DUMP");
        wordIndexCache.close();
        log.logInfo("TERMINATED MIGRATION");
    }    
    
    /**
     * @param homePath path to the YaCy directory
     * @param dbcache cache size in MB
     */
    public static void minimizeUrlDB(String homePath, int dbcache) {
        // run with "java -classpath classes yacy -minimizeUrlDB"
        try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
        File indexRoot = new File(new File(homePath), "DATA/INDEX");
        File indexRoot2 = new File(new File(homePath), "DATA/INDEX2");
        serverLog log = new serverLog("URL-CLEANUP");
        try {
            log.logInfo("STARTING URL CLEANUP");
            
            // db containing all currently loades urls
            int cache = dbcache * 1024; // in KB
            log.logFine("URLDB-Caches: "+cache+" bytes");
            plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(indexRoot, cache, 10000);
            
            // db used to hold all neede urls
            plasmaCrawlLURL minimizedUrlDB = new plasmaCrawlLURL(indexRoot2, cache, 10000);
            
            Runtime rt = Runtime.getRuntime();
            int cacheMem = (int)(serverMemory.max-rt.totalMemory());
            if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
                
            plasmaWordIndex wordIndex = new plasmaWordIndex(indexRoot, cacheMem, cacheMem, 10000, log);
            Iterator indexContainerIterator = wordIndex.wordContainers("AAAAAAAAAAAA", false, false);
            
            long urlCounter = 0, wordCounter = 0;
            long wordChunkStart = System.currentTimeMillis(), wordChunkEnd = 0;
            String wordChunkStartHash = "AAAAAAAAAAAA", wordChunkEndHash;
            
            while (indexContainerIterator.hasNext()) {
                indexContainer wordIdxContainer = null;
                try {
                    wordCounter++;
                    wordIdxContainer  = (indexContainer) indexContainerIterator.next();
                    
                    // the combined container will fit, read the container
                    Iterator wordIdxEntries = wordIdxContainer.entries();
                    indexRWIEntry iEntry;
                    while (wordIdxEntries.hasNext()) {
                        iEntry = (indexRWIEntry) wordIdxEntries.next();
                        String urlHash = iEntry.urlHash();                    
                        if ((currentUrlDB.exists(urlHash)) && (!minimizedUrlDB.exists(urlHash))) try {
                            indexURLEntry urlEntry = currentUrlDB.load(urlHash, null);                       
                            urlCounter++;
                            minimizedUrlDB.store(urlEntry);
                            if (urlCounter % 500 == 0) {
                                log.logInfo(urlCounter + " URLs found so far.");
                            }
                        } catch (IOException e) {}
                    }
                    
                    if (wordCounter%500 == 0) {
                        wordChunkEndHash = wordIdxContainer.getWordHash();
                        wordChunkEnd = System.currentTimeMillis();
                        long duration = wordChunkEnd - wordChunkStart;
                        log.logInfo(wordCounter + " words scanned " +
                                "[" + wordChunkStartHash + " .. " + wordChunkEndHash + "]\n" + 
                                "Duration: "+ 500*1000/duration + " words/s" +
                                " | Free memory: " + rt.freeMemory() + 
                                " | Total memory: " + rt.totalMemory());
                        wordChunkStart = wordChunkEnd;
                        wordChunkStartHash = wordChunkEndHash;
                    }
                    
                    // we have read all elements, now we can close it
                    wordIdxContainer = null;
                    
                } catch (Exception e) {
                    log.logSevere("Exception", e);
                } finally {
                    if (wordIdxContainer != null) try { wordIdxContainer = null; } catch (Exception e) {}
                }
            }
            log.logInfo("current LURL DB contains " + currentUrlDB.size() + " entries.");
            log.logInfo("mimimized LURL DB contains " + minimizedUrlDB.size() + " entries.");
            
            currentUrlDB.close();
            minimizedUrlDB.close();
            wordIndex.close();
            
            // TODO: rename the mimimized UrlDB to the name of the previous UrlDB            
            
            log.logInfo("FINISHED URL CLEANUP, WAIT FOR DUMP");
            log.logInfo("You can now backup your old URL DB and rename minimized/urlHash.db to urlHash.db");
            
            log.logInfo("TERMINATED URL CLEANUP");
        } catch (Exception e) {
            log.logSevere("Exception: " + e.getMessage(), e);
        } catch (Error e) {
            log.logSevere("Error: " + e.getMessage(), e);
        }
    }

    /**
    * Reads all words from the given file and creates a hashmap, where key is
    * the plasma word hash and value is the word itself.
    *
    * @param wordlist File where the words are stored.
    * @return HashMap with the hash-word - relation.
    */
    private static HashMap loadWordMap(File wordlist) {
        // returns a hash-word - Relation
        HashMap wordmap = new HashMap();
        try {
            String word;
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist)));
            while ((word = br.readLine()) != null) wordmap.put(plasmaCondenser.word2hash(word),word);
            br.close();
        } catch (IOException e) {}
        return wordmap;
    }

    /**
    * Reads all words from the given file and creats as HashSet, which contains
    * all found words.
    *
    * @param wordlist File where the words are stored.
    * @return HashSet with the words
    */
    private static HashSet loadWordSet(File wordlist) {
        // returns a set of words
        HashSet wordset = new HashSet();
        try {
            String word;
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist)));
            while ((word = br.readLine()) != null) wordset.add(word);
            br.close();
        } catch (IOException e) {}
        return wordset;
    }

    /**
    * Cleans a wordlist in a file according to the length of the words. The
    * file with the given filename is read and then only the words in the given
    * length-range are written back to the file.
    *
    * @param wordlist Name of the file the words are stored in.
    * @param minlength Minimal needed length for each word to be stored.
    * @param maxlength Maximal allowed length for each word to be stored.
    */
    private static void cleanwordlist(String wordlist, int minlength, int maxlength) {
        // start up
        System.out.println(copyright);
        System.out.println(hline);
        serverLog.logConfig("CLEAN-WORDLIST", "START");

        String word;
        TreeSet wordset = new TreeSet();
        int count = 0;
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(wordlist)));
            String seps = "' .,:/-&";
            while ((word = br.readLine()) != null) {
                word = word.toLowerCase().trim();
                for (int i = 0; i < seps.length(); i++) {
                    if (word.indexOf(seps.charAt(i)) >= 0) word = word.substring(0, word.indexOf(seps.charAt(i)));
                }
                if ((word.length() >= minlength) && (word.length() <= maxlength)) wordset.add(word);
                count++;
            }
            br.close();

            if (wordset.size() != count) {
                count = count - wordset.size();
                BufferedWriter bw = new BufferedWriter(new PrintWriter(new FileWriter(wordlist)));
                while (wordset.size() > 0) {
                    word = (String) wordset.first();
                    bw.write(word + "\n");
                    wordset.remove(word);
                }
                bw.close();
                serverLog.logInfo("CLEAN-WORDLIST", "shrinked wordlist by " + count + " words.");
            } else {
                serverLog.logInfo("CLEAN-WORDLIST", "not necessary to change wordlist");
            }
        } catch (IOException e) {
            serverLog.logSevere("CLEAN-WORDLIST", "ERROR: " + e.getMessage());
            System.exit(-1);
        }

        // finished
        serverLog.logConfig("CLEAN-WORDLIST", "FINISHED");
    }

    /**
    * Gets all words from the stopword-list and removes them in the databases.
    * FIXME: Really? Don't know if I read this correctly.
    *
    * @param homePath Root-Path where all information is to be found.
    */
    private static void deleteStopwords(String homePath) {
        // start up
        System.out.println(copyright);
        System.out.println(hline);
        serverLog.logConfig("DELETE-STOPWORDS", "START");

        Properties config = configuration("DELETE-STOPWORDS", homePath);
        File dbRoot = new File(homePath, config.getProperty("dbPath"));

        // load stopwords
        HashSet stopwords = loadWordSet(new File(homePath, "yacy.stopwords"));
        serverLog.logInfo("DELETE-STOPWORDS", "loaded stopwords, " + stopwords.size() + " entries in list, starting scanning");

        // find all hashes
        File f;
        String w;
        int count = 0;
        long thisamount, totalamount = 0;
        Iterator i = stopwords.iterator();
        while (i.hasNext()) {
            w = (String) i.next();
            f = plasmaWordIndexFile.wordHash2path(dbRoot, plasmaCondenser.word2hash(w));
            if (f.exists()) {
                thisamount = f.length();
                if (f.delete()) {
                    count++;
                    totalamount += thisamount;
                    serverLog.logInfo("DELETE-STOPWORDS", "deleted index for word '" + w + "', " + thisamount + " bytes");
                }
            }
        }

        serverLog.logInfo("DELETE-STOPWORDS", "TOTALS: deleted " + count + " indexes; " + (totalamount / 1024) + " kbytes");

        // finished
        serverLog.logConfig("DELETE-STOPWORDS", "FINISHED");
    }

    private static void transferCR(String targetaddress, String crfile) {
        File f = new File(crfile);
        try {
            byte[] b = serverFileUtils.read(f);
            String result = yacyClient.transfer(targetaddress, f.getName(), b);
            if (result == null)
                serverLog.logInfo("TRANSFER-CR", "transmitted file " + crfile + " to " + targetaddress + " successfully");
            else
                serverLog.logInfo("TRANSFER-CR", "error transmitting file " + crfile + " to " + targetaddress + ": " + result);
        } catch (IOException e) {
            serverLog.logInfo("TRANSFER-CR", "could not read file " + crfile);
        }
    }
    /**
    * Generates a text file containing all domains in this peer's DB.
    * This may be useful to calculate the YaCy-Blockrank.
    *
    * @param format String which determines the format of the file. Possible values: "html", "zip", "gzip" or "plain"
    * @see urllist
    */
    private static void domlist(String homePath, String source, String format, String targetName) {
    	
    	File root = new File(homePath);
        try {
            final plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
            HashMap doms = new HashMap();
            System.out.println("Started domain list extraction from " + sb.wordIndex.loadedURL.size() + " url entries.");
            System.out.println("a dump will be written after double-check of all extracted domains.");
            System.out.println("This process may fail in case of too less memory. To increase memory, start with");
            System.out.println("java -Xmx<megabytes>m -classpath classes yacy -domlist [ -source { nurl | lurl | eurl } ] [ -format { text  | zip | gzip | html } ] [ <path to DATA folder> ]");
            int c = 0;
            long start = System.currentTimeMillis();
            if (source.equals("lurl")) {
                Iterator eiter = sb.wordIndex.loadedURL.entries(true, false, null);
                indexURLEntry entry;
                while (eiter.hasNext()) {
                    try {
                        entry = (indexURLEntry) eiter.next();
                        indexURLEntry.Components comp = entry.comp();
                        if ((entry != null) && (comp.url() != null)) doms.put(comp.url().getHost(), null);
                    } catch (Exception e) {
                        // here a MalformedURLException may occur
                        // just ignore
                    }
                    c++;
                    if (c % 10000 == 0) System.out.println(
                            c + " urls checked, " +
                            doms.size() + " domains collected, " +
                            ((Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory() + Runtime.getRuntime().freeMemory()) / 1024 / 1024) + " MB available, " + 
                            ((System.currentTimeMillis() - start) * (sb.wordIndex.loadedURL.size() - c) / c / 60000) + " minutes remaining.");
                }
            }
            if (source.equals("eurl")) {
                Iterator eiter = sb.errorURL.entries(true, false, null);
                plasmaCrawlEURL.Entry entry;
                while (eiter.hasNext()) {
                    try {
                        entry = (plasmaCrawlEURL.Entry) eiter.next();
                        if ((entry != null) && (entry.url() != null)) doms.put(entry.url().getHost(), entry.failreason());
                    } catch (Exception e) {
                        // here a MalformedURLException may occur
                        // just ignore
                    }
                    c++;
                    if (c % 10000 == 0) System.out.println(
                            c + " urls checked, " +
                            doms.size() + " domains collected, " +
                            ((Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory() + Runtime.getRuntime().freeMemory()) / 1024 / 1024) + " MB available, " + 
                            ((System.currentTimeMillis() - start) * (sb.wordIndex.loadedURL.size() - c) / c / 60000) + " minutes remaining.");
                }
            }
            if (source.equals("nurl")) {
                Iterator eiter = sb.noticeURL.entries(true, false, null);
                plasmaCrawlNURL.Entry entry;
                while (eiter.hasNext()) {
                    try {
                        entry = (plasmaCrawlNURL.Entry) eiter.next();
                        if ((entry != null) && (entry.url() != null)) doms.put(entry.url().getHost(), "profile=" + entry.profileHandle() + ", depth=" + entry.depth());
                    } catch (Exception e) {
                        // here a MalformedURLException may occur
                        // just ignore
                    }
                    c++;
                    if (c % 10000 == 0) System.out.println(
                            c + " urls checked, " +
                            doms.size() + " domains collected, " +
                            ((Runtime.getRuntime().maxMemory() - Runtime.getRuntime().totalMemory() + Runtime.getRuntime().freeMemory()) / 1024 / 1024) + " MB available, " + 
                            ((System.currentTimeMillis() - start) * (sb.wordIndex.loadedURL.size() - c) / c / 60000) + " minutes remaining.");
                }
            }
            
            if (format.equals("html")) {
                // output file in HTML format
                File file = new File(root, targetName + ".html");
                BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
                System.out.println("Started domain list dump to file " + file);
                Iterator i = doms.entrySet().iterator();
                Map.Entry entry;
                String key;
                bos.write(("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">").getBytes());
                bos.write(serverCore.crlf);
                bos.write(("<html><head><title>YaCy " + source + " domainlist</title></head><body>").getBytes());
                bos.write(serverCore.crlf);
                while (i.hasNext()) {
                    entry = (Map.Entry) i.next();
                    key = (String) entry.getKey();
                    bos.write(("<a href=\"http://" + key + "\">" + key + "</a>" +
                              ((entry.getValue() == null) ? "" : (" " + ((String) entry.getValue()))) + "<br>"
                             ).getBytes());
                    bos.write(serverCore.crlf);
                }
                bos.write(("</body></html>").getBytes());
                bos.close();
            
            } else if (format.equals("zip")) {
                // output file in plain text but compressed with ZIP
                File file = new File(root, targetName + ".zip");
                System.out.println("Started domain list dump to file " + file);
                serverFileUtils.saveSet(file, "zip", doms.keySet(), new String(serverCore.crlf));
            
            } else if (format.equals("gzip")) {
                // output file in plain text but compressed with GZIP
                File file = new File(root, targetName + ".txt.gz");
                System.out.println("Started domain list dump to file " + file);
                serverFileUtils.saveSet(file, "gzip", doms.keySet(), new String(serverCore.crlf));
            } else {
                // plain text list
                File file = new File(root, targetName + ".txt");
                System.out.println("Started domain list dump to file " + file);
                serverFileUtils.saveSet(file, "plain", doms.keySet(), new String(serverCore.crlf));
            }
            sb.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    private static void urllist(String homePath, String source, boolean html, String targetName) {
        File root = new File(homePath);
        try {
            final plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
            File file = new File(root, targetName);
            BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
            
            if (source.equals("lurl")) {
                Iterator eiter = sb.wordIndex.loadedURL.entries(true, false, null);
                indexURLEntry entry;
                while (eiter.hasNext()) {
                    entry = (indexURLEntry) eiter.next();
                    indexURLEntry.Components comp = entry.comp();
                    if ((entry != null) && (comp.url() != null)) {
                        if (html) {
                            bos.write(("<a href=\"" + comp.url().toNormalform() + "\">" + comp.descr() + "</a><br>").getBytes("UTF-8"));
                            bos.write(serverCore.crlf);
                        } else {
                            bos.write(comp.url().toNormalform().getBytes());
                            bos.write(serverCore.crlf);
                        }
                    }
                }
            }
            if (source.equals("eurl")) {
                Iterator eiter = sb.errorURL.entries(true, false, null);
                plasmaCrawlEURL.Entry entry;
                while (eiter.hasNext()) {
                    entry = (plasmaCrawlEURL.Entry) eiter.next();
                    if ((entry != null) && (entry.url() != null)) {
                        if (html) {
                            bos.write(("<a href=\"" + entry.url() + "\">" + entry.url() + "</a> " + entry.failreason() + "<br>").getBytes("UTF-8"));
                            bos.write(serverCore.crlf);
                        } else {
                            bos.write(entry.url().toString().getBytes());
                            bos.write(serverCore.crlf);
                        }
                    }
                }
            }
            if (source.equals("nurl")) {
                Iterator eiter = sb.noticeURL.entries(true, false, null);
                plasmaCrawlNURL.Entry entry;
                while (eiter.hasNext()) {
                    entry = (plasmaCrawlNURL.Entry) eiter.next();
                    if ((entry != null) && (entry.url() != null)) {
                        if (html) {
                            bos.write(("<a href=\"" + entry.url() + "\">" + entry.url() + "</a> " + "profile=" + entry.profileHandle() + ", depth=" + entry.depth() + "<br>").getBytes("UTF-8"));
                            bos.write(serverCore.crlf);
                        } else {
                            bos.write(entry.url().toString().getBytes());
                            bos.write(serverCore.crlf);
                        }
                    }
                }
            }
            bos.close();
            sb.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    private static void migratelurls(String homePath, File urlHash) {
        final plasmaSwitchboard sb = new plasmaSwitchboard(homePath, "yacy.init", "DATA/SETTINGS/httpProxy.conf");
        kelondroTree oldindex = null;
        try {
            oldindex = new kelondroTree(urlHash, 1000, -1, indexURLEntryOld.rowdef);
        } catch (IOException e) {
            System.out.println("ERROR: CANNOT OPEN OLD INDEX: " + e.getMessage());
        }
            
        long start = System.currentTimeMillis();
        long last = start;
        int tc = oldindex.size(), c = 0;
        Iterator eiter = oldindex.contentRows(-1);
        kelondroRow.Entry oldrow;
        indexURLEntry oldentry;
        indexURLEntry newentry;
        indexURLEntry.Components comp;
        byte[] dummymd5 = new byte[0];
        while (eiter.hasNext()) {
            try {
                oldrow = (kelondroRow.Entry) eiter.next();
            } catch (Exception e) {
                // an IOException may occur here
                //e.printStackTrace();
                oldrow = null;
            }
            if (oldrow != null) try {
                oldentry = new indexURLEntryOld(oldrow, null);
                comp = oldentry.comp();
                newentry = sb.wordIndex.loadedURL.newEntry(
                            comp.url(), 
                            comp.descr(), 
                            "", 
                            "", 
                            "", 
                            oldentry.moddate(), 
                            oldentry.loaddate(),
                            oldentry.freshdate(), 
                            oldentry.referrerHash(), 
                            dummymd5, 
                            oldentry.size(), 
                            oldentry.wordCount(), 
                            oldentry.doctype(), 
                            new kelondroBitfield(4), 
                            oldentry.language(), 
                            0, 0, 0, 0, 0, 0);
                sb.wordIndex.loadedURL.store(newentry);
                c++;
            } catch (IOException e) {
                // ignore
            }
            if (System.currentTimeMillis() - last > 60000) {
                System.out.println("Migrated " + c + " from " + tc + " urls. Estimated remaining time: " + ((System.currentTimeMillis() - start) * (tc - c)  / c / 60000) + " minutes");
                last = System.currentTimeMillis();
            }
        }
        sb.close();
        try { oldindex.close(); } catch (IOException e) { }
        System.out.println("MIGRATION OF " + c + " URLs FINISHED");
    }
    
    private static String[] shift(String[] args, int pos, int count) {
        String[] newargs = new String[args.length - count];
        System.arraycopy(args, 0, newargs, 0, pos);
        System.arraycopy(args, pos + count, newargs, pos, args.length - pos - count);
        return newargs;
    }
    
    /**
     * Uses an Iteration over urlHash.db to detect malformed URL-Entries.
     * Damaged URL-Entries will be marked in a HashSet and removed at the end of the function.
     *
     * @param homePath Root-Path where all information is to be found.
     */
    private static void urldbcleanup(String homePath) {
        File root = new File(homePath);
        File indexroot = new File(root, "DATA/INDEX");
        serverLog log = new serverLog("URLDBCLEANUP");
        try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
        try {
            plasmaCrawlLURL currentUrlDB = new plasmaCrawlLURL(indexroot, 4194304, 10000);
            currentUrlDB.urldbcleanup();
            currentUrlDB.close();
        } catch (IOException e) {
            log.logSevere("IOException", e);
        }
    }
    
    private static void RWIHashList(String homePath, String targetName, String resource, String format) {
        plasmaWordIndex WordIndex = null;
        serverLog log = new serverLog("HASHLIST");
        File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
        File indexRoot = new File(new File(homePath), "DATA/INDEX");
        String wordChunkStartHash = "AAAAAAAAAAAA";
        try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
        log.logInfo("STARTING CREATION OF RWI-HASHLIST");
        File root = new File(homePath);
        try {
            Iterator indexContainerIterator = null;
            if (resource.equals("all")) {
                WordIndex = new plasmaWordIndex(indexRoot, 8*1024*1024, 8*1024*1024, 3000, log);
                indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, false, false);
            } else if (resource.startsWith("assortment")) {
                int a = Integer.parseInt(resource.substring(10));
                plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, 3000, null);
                indexContainerIterator = assortment.wordContainers();
            } else if (resource.equals("words")) {
                plasmaWordIndexFileCluster fileDB = new plasmaWordIndexFileCluster(homeDBroot);
                indexContainerIterator = fileDB.wordContainers(wordChunkStartHash, false);
            }
            int counter = 0;
            indexContainer container = null;
            if (format.equals("zip")) {
                log.logInfo("Writing Hashlist to ZIP-file: " + targetName + ".zip");
                ZipEntry zipEntry = new ZipEntry(targetName + ".txt");
                File file = new File(root, targetName + ".zip");
                ZipOutputStream bos = new ZipOutputStream(new FileOutputStream(file));
                bos.putNextEntry(zipEntry);
                while (indexContainerIterator.hasNext()) {
                    counter++;
                    container = (indexContainer) indexContainerIterator.next();
                    bos.write((container.getWordHash()).getBytes());
                    bos.write(serverCore.crlf);
                    if (counter % 500 == 0) {
                        log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + container.getWordHash());
                    }
                }
                bos.flush();
                bos.close();
            } else {
                log.logInfo("Writing Hashlist to TXT-file: " + targetName + ".txt");
                File file = new File(root, targetName + ".txt");
                BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file));
                while (indexContainerIterator.hasNext()) {
                    counter++;
                    container = (indexContainer) indexContainerIterator.next();
                    bos.write((container.getWordHash()).getBytes());
                    bos.write(serverCore.crlf);
                    if (counter % 500 == 0) {
                        log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + container.getWordHash());
                    }
                }
                bos.flush();
                bos.close();
            }
            log.logInfo("Total number of Hashs: " + counter + ". Last found Hash: " + container.getWordHash());
        } catch (IOException e) {
            log.logSevere("IOException", e);
        }
        if (WordIndex != null) {
            WordIndex.close();
            WordIndex = null;
        }
    }
    
    /**
     * Searching for peers affected by Bug documented in <a href="http://www.yacy-forum.de/viewtopic.php?p=16056#16056">YaCy-Forum Posting 16056</a>
     * @param homePath
     * @see <a href="http://www.yacy-forum.de/viewtopic.php?p=16056#16056">YaCy-Forum Posting 16056</a>
     */
    public static void testPeerDB(String homePath) {
        
        try {
            File yacyDBPath = new File(new File(homePath), "DATA/YACYDB");
            
            String[] dbFileNames = {"seed.new.db","seed.old.db","seed.pot.db"};
            for (int i=0; i < dbFileNames.length; i++) {
                File dbFile = new File(yacyDBPath,dbFileNames[i]);
                kelondroMapObjects db = new kelondroMapObjects(new kelondroDyn(dbFile, (1024 * 0x400) / 3, 3000, yacySeedDB.commonHashLength, 480, '#', true, false), 500, yacySeedDB.sortFields, yacySeedDB.longaccFields, yacySeedDB.doubleaccFields, null, null);
                
                kelondroMapObjects.mapIterator it;
                it = db.maps(true, false);
                while (it.hasNext()) {
                    Map dna = (Map) it.next();
                    String peerHash = (String) dna.get("key");
                    if (peerHash.length() < yacySeedDB.commonHashLength) {
                        String peerName = (String) dna.get("Name");
                        String peerIP = (String) dna.get("IP");
                        String peerPort = (String) dna.get("Port");
                        
                        while (peerHash.length() < yacySeedDB.commonHashLength) { peerHash = peerHash + "_"; }                        
                        System.err.println("Invalid Peer-Hash found in '" + dbFileNames[i] + "': " + peerName + ":" +  peerHash + ", http://" + peerIP + ":" + peerPort);
                    }
                }
                db.close();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

 
    /**
     * Main-method which is started by java. Checks for special arguments or
     * starts up the application.
     * 
     * @param args
     *            Given arguments from the command line.
     */
    public static void main(String args[]) {

        // check assertion status
        //ClassLoader.getSystemClassLoader().setDefaultAssertionStatus(true);
        boolean assertionenabled = false;
        assert assertionenabled = true;
        if (assertionenabled) System.out.println("Asserts are enabled");
        
        // check memory amount
        System.gc();
        long startupMemFree  = Runtime.getRuntime().freeMemory(); // the amount of free memory in the Java Virtual Machine
        long startupMemTotal = Runtime.getRuntime().totalMemory(); // the total amount of memory in the Java virtual machine; may vary over time
        serverMemory.available(); // force initialization of class serverMemory
        
        // go into headless awt mode
        System.setProperty("java.awt.headless", "true");
        //which XML Parser?
//        if(System.getProperty("javax.xml.parsers.DocumentBuilderFactory")==null){
//            System.setProperty("javax.xml.parsers.DocumentBuilderFactory", "org.apache.crimson.jaxp.DocumentBuilderFactoryImpl");
//        }
//        if(System.getProperty("javax.xml.parsers.SAXParserFactory")==null){
//            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.crimson.jaxp.SAXParserFactoryImpl");
//        }
        
        String applicationRoot = System.getProperty("user.dir").replace('\\', '/');
        //System.out.println("args.length=" + args.length);
        //System.out.print("args=["); for (int i = 0; i < args.length; i++) System.out.print(args[i] + ", "); System.out.println("]");
        if ((args.length >= 1) && ((args[0].toLowerCase().equals("-startup")) || (args[0].equals("-start")))) {
            // normal start-up of yacy
            if (args.length == 2) applicationRoot= args[1];
            startup(applicationRoot, startupMemFree, startupMemTotal);
        } else if ((args.length >= 1) && ((args[0].toLowerCase().equals("-shutdown")) || (args[0].equals("-stop")))) {
            // normal shutdown of yacy
            if (args.length == 2) applicationRoot= args[1];
            shutdown(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-migratewords"))) {
            // migrate words from DATA/PLASMADB/WORDS path to collection index
            // attention: this may run long and should not be interrupted!
            if (args.length == 2) applicationRoot= args[1];
            migrateWords(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-migrateassortments"))) {
            // migrate assortments from DATA/PLASMADB/ACLUSTER path to collection index
            // attention: this may run long and should not be interrupted!
            if (args.length == 2) applicationRoot= args[1];
            migrateAssortments(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-minimizeurldb"))) {
            // migrate words from DATA/PLASMADB/WORDS path to assortment cache, if possible
            // attention: this may run long and should not be interrupted!
            int dbcache = 4;
            if (args.length >= 3 && args[1].toLowerCase().equals("-cache")) {
                dbcache = Integer.parseInt(args[2]);
                args = shift(args, 1, 2);
            }
            if (args.length == 2) applicationRoot= args[1];
            minimizeUrlDB(applicationRoot, dbcache);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-testpeerdb"))) {
            if (args.length == 2) {
                applicationRoot= args[1];
            } else if (args.length > 2) {
                System.err.println("Usage: -testPeerDB [homeDbRoot]");
            }
            testPeerDB(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-deletestopwords"))) {
            // delete those words in the index that are listed in the stopwords file
            if (args.length == 2) applicationRoot= args[1];
            deleteStopwords(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-genwordstat"))) {
            // this can help to create a stop-word list
            // to use this, you need a 'yacy.words' file in the root path
            // start this with "java -classpath classes yacy -genwordstat [<rootdir>]"
            if (args.length == 2) applicationRoot= args[1];
            genWordstat(applicationRoot);
        } else if ((args.length == 4) && (args[0].toLowerCase().equals("-cleanwordlist"))) {
            // this can be used to organize and clean a word-list
            // start this with "java -classpath classes yacy -cleanwordlist <word-file> <minlength> <maxlength>"
            int minlength = Integer.parseInt(args[2]);
            int maxlength = Integer.parseInt(args[3]);
            cleanwordlist(args[1], minlength, maxlength);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-transfercr"))) {
            // transfer a single cr file to a remote peer
            String targetaddress = args[1];
            String crfile = args[2];
            transferCR(targetaddress, crfile);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-domlist"))) {
            // generate a url list and save it in a file
            String source = "lurl";
            if (args.length >= 3 && args[1].toLowerCase().equals("-source")) {
                if ((args[2].equals("nurl")) ||
                    (args[2].equals("lurl")) ||
                    (args[2].equals("eurl")))
                    source = args[2];
                args = shift(args, 1, 2);
            }
            String format = "txt";
            if (args.length >= 3 && args[1].toLowerCase().equals("-format")) {
                if ((args[2].equals("html")) ||
                    (args[2].equals("zip")) ||
                    (args[2].equals("gzip")))
                    format = args[2];
                args = shift(args, 1, 2);
            }
            if (args.length == 2) applicationRoot= args[1];
            String outfile = "domlist_" + source + "_" + System.currentTimeMillis();
            domlist(applicationRoot, source, format, outfile);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urllist"))) {
            // generate a url list and save it in a file
            String source = "lurl";
            if (args.length >= 3 && args[1].toLowerCase().equals("-source")) {
                if ((args[2].equals("nurl")) ||
                    (args[2].equals("lurl")) ||
                    (args[2].equals("eurl")))
                    source = args[2];
                args = shift(args, 1, 2);
            }
            boolean html = false;
            if (args.length >= 3 && args[1].toLowerCase().equals("-format")) {
                if (args[2].equals("html")) html = true;
                args = shift(args, 1, 2);
            }
            if (args.length == 2) applicationRoot= args[1];
            String outfile = "urllist_" + source + "_" + System.currentTimeMillis() + ((html) ? ".html" : ".txt");
            urllist(applicationRoot, source, html, outfile);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-migratelurls"))) {
            File root = new File(applicationRoot);
            migratelurls(applicationRoot, new File(root, "DATA/PLASMADB/urlHash.db"));
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-urldbcleanup"))) {
            // generate a url list and save it in a file
            if (args.length == 2) applicationRoot= args[1];
            urldbcleanup(applicationRoot);
        } else if ((args.length >= 1) && (args[0].toLowerCase().equals("-rwihashlist"))) {
            // generate a url list and save it in a file
            String domain = "all";
            String format = "txt";
            if (args.length >= 2) domain= args[1];
            if (args.length >= 3) format= args[2];
            if (args.length == 4) applicationRoot= args[3];
            String outfile = "rwihashlist_" + System.currentTimeMillis();
            RWIHashList(applicationRoot, outfile, domain, format);
        } else {
            if (args.length == 1) applicationRoot= args[0];
            startup(applicationRoot, startupMemFree, startupMemTotal);
        }
    }
}

/**
* This class is a helper class whose instance is started, when the java virtual
* machine shuts down. Signals the plasmaSwitchboard to shut down.
*/
class shutdownHookThread extends Thread {
    private plasmaSwitchboard sb = null;
    private Thread mainThread = null;

    public shutdownHookThread(Thread mainThread, plasmaSwitchboard sb) {
        super();
        this.sb = sb;
        this.mainThread = mainThread;
    }

    public void run() {
        try {
            if (!this.sb.isTerminated()) {
                serverLog.logConfig("SHUTDOWN","Shutdown via shutdown hook.");

                // sending the yacy main thread a shutdown signal
                serverLog.logFine("SHUTDOWN","Signaling shutdown to the switchboard.");
                this.sb.terminate();

                // waiting for the yacy thread to finish execution
                serverLog.logFine("SHUTDOWN","Waiting for main thread to finish.");
                if (this.mainThread.isAlive() && !this.sb.isTerminated()) {
                    this.mainThread.join();
                }
            }
        } catch (Exception e) {
            serverLog.logSevere("SHUTDOWN","Unexpected error. " + e.getClass().getName(),e);
        }
    }
}