2006-03-18 22:47:04 +01:00
// yacySeed.java
2005-04-07 21:19:42 +02:00
// -------------------------------------
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-04-07 21:19:42 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
2005-09-21 23:32:43 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
2006-03-18 22:47:04 +01:00
//
// YACY stands for Yet Another CYberspace
//
// the yacySeed Object is the object that bundles and carries all information about
// a single peer in the yacy space.
// The yacySeed object is carried along peers using a string representation, that can
// be compressed and/or scrambled, depending on the purpose of the process.
//
// the yacy status
// any value that is defined here will be overwritten each time the proxy is started
// to prevent that the system gets confused, it should be set to "" which means
// undefined. Other status' that can be reached at run-time are
// junior - a peer that has no public socket, thus cannot be reached on demand
// senior - a peer that has a public socked and serves search queries
// principal - a peer like a senior socket and serves as gateway for network definition
2005-04-07 21:19:42 +02:00
package de.anomic.yacy ;
2005-05-05 07:32:19 +02:00
import java.io.File ;
import java.io.FileReader ;
import java.io.FileWriter ;
import java.io.IOException ;
2006-12-20 02:07:49 +01:00
import java.net.InetAddress ;
2008-05-25 20:35:38 +02:00
import java.net.MalformedURLException ;
import java.net.URL ;
2005-05-05 07:32:19 +02:00
import java.util.Date ;
import java.util.HashMap ;
2007-04-26 11:51:51 +02:00
import java.util.Iterator ;
2008-10-10 10:39:11 +02:00
import java.util.Map ;
2007-10-01 14:30:23 +02:00
import java.util.Random ;
2006-10-10 22:09:26 +02:00
import java.util.Set ;
2008-03-11 00:28:05 +01:00
import java.util.TreeMap ;
2010-02-15 16:57:35 +01:00
import java.util.concurrent.ConcurrentHashMap ;
2010-09-07 19:13:47 +02:00
import java.util.regex.Pattern ;
2006-01-04 01:39:00 +01:00
2010-09-14 17:27:27 +02:00
import net.yacy.cora.protocol.Domains ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.word.Word ;
2010-04-15 15:22:59 +02:00
import net.yacy.kelondro.index.HandleSet ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Base64Order ;
import net.yacy.kelondro.order.Digest ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.DateFormatter ;
2009-10-11 02:24:42 +02:00
import net.yacy.kelondro.util.MapTools ;
2009-10-10 01:22:22 +02:00
2005-05-05 07:32:19 +02:00
import de.anomic.tools.bitfield ;
import de.anomic.tools.crypt ;
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
import de.anomic.yacy.dht.FlatWordPartitionScheme ;
2005-04-07 21:19:42 +02:00
2008-08-02 15:57:00 +02:00
public class yacySeed implements Cloneable {
2005-09-21 23:32:43 +02:00
2008-02-10 16:57:52 +01:00
public static final int maxsize = 4096 ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " sI " ( send index / words )
* /
2005-09-21 23:32:43 +02:00
public static final String INDEX_OUT = " sI " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " rI " ( received index / words )
* /
2005-09-21 23:32:43 +02:00
public static final String INDEX_IN = " rI " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " sU " ( send URLs )
* /
2005-09-21 23:32:43 +02:00
public static final String URL_OUT = " sU " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " rU " ( received URLs )
* /
2005-09-21 23:32:43 +02:00
public static final String URL_IN = " rU " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " virgin "
* /
2005-06-28 13:27:31 +02:00
public static final String PEERTYPE_VIRGIN = " virgin " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " junior "
* /
2005-06-28 13:27:31 +02:00
public static final String PEERTYPE_JUNIOR = " junior " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " senior "
* /
2005-06-28 13:27:31 +02:00
public static final String PEERTYPE_SENIOR = " senior " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " principal "
* /
2005-06-28 13:27:31 +02:00
public static final String PEERTYPE_PRINCIPAL = " principal " ;
2006-03-18 22:47:04 +01:00
/ * *
* < b > substance < / b > " PeerType "
* /
2005-06-28 13:27:31 +02:00
public static final String PEERTYPE = " PeerType " ;
2006-03-18 22:47:04 +01:00
2007-04-14 12:11:37 +02:00
/** static/dynamic (if the IP changes often for any reason) */
2010-04-19 18:42:37 +02:00
private static final String IPTYPE = " IPType " ;
private static final String FLAGS = " Flags " ;
private static final String FLAGSZERO = " ____ " ;
2007-04-14 12:11:37 +02:00
/** the applications version */
2010-04-19 18:42:37 +02:00
public static final String VERSION = " Version " ;
2006-03-18 22:47:04 +01:00
2010-04-19 18:42:37 +02:00
public static final String YOURTYPE = " yourtype " ;
public static final String LASTSEEN = " LastSeen " ;
private static final String USPEED = " USpeed " ;
2005-10-17 17:46:12 +02:00
2007-04-14 12:11:37 +02:00
/** the name of the peer (user-set) */
2010-04-19 18:42:37 +02:00
public static final String NAME = " Name " ;
private static final String HASH = " Hash " ;
2007-04-14 12:11:37 +02:00
/** Birthday - first startup */
2010-04-19 18:42:37 +02:00
private static final String BDATE = " BDate " ;
2007-04-14 12:11:37 +02:00
/** UTC-Offset */
2010-04-19 18:42:37 +02:00
public static final String UTC = " UTC " ;
private static final String PEERTAGS = " Tags " ;
2005-10-17 17:46:12 +02:00
2007-04-14 12:11:37 +02:00
/** the speed of indexing (pages/minute) of the peer */
2005-10-17 17:46:12 +02:00
public static final String ISPEED = " ISpeed " ;
2007-04-14 12:11:37 +02:00
/** the speed of retrieval (queries/minute) of the peer */
2007-01-31 16:39:11 +01:00
public static final String RSPEED = " RSpeed " ;
2007-04-14 12:11:37 +02:00
/** the number of minutes that the peer is up in minutes/day (moving average MA30) */
2005-10-17 17:46:12 +02:00
public static final String UPTIME = " Uptime " ;
2007-04-14 12:11:37 +02:00
/** the number of links that the peer has stored (LURL's) */
2005-10-17 17:46:12 +02:00
public static final String LCOUNT = " LCount " ;
2007-04-14 12:11:37 +02:00
/** the number of links that the peer has noticed, but not loaded (NURL's) */
2005-10-17 17:46:12 +02:00
public static final String NCOUNT = " NCount " ;
2007-08-22 02:59:05 +02:00
/** the number of links that the peer provides for remote crawls (ZURL's) */
public static final String RCOUNT = " RCount " ;
2008-04-19 06:24:29 +02:00
/** the number of different words the peer has indexed */
2005-10-17 17:46:12 +02:00
public static final String ICOUNT = " ICount " ;
2007-04-14 12:11:37 +02:00
/** the number of seeds that the peer has stored */
2005-10-17 17:46:12 +02:00
public static final String SCOUNT = " SCount " ;
2007-10-25 01:25:48 +02:00
/** the number of clients that the peer connects (connects/hour as double) */
2007-04-14 12:11:37 +02:00
public static final String CCOUNT = " CCount " ;
/** Citation Rank (Own) - Count */
public static final String CRWCNT = " CRWCnt " ;
/** Citation Rank (Other) - Count */
public static final String CRTCNT = " CRTCnt " ;
2005-10-17 17:46:12 +02:00
public static final String IP = " IP " ;
public static final String PORT = " Port " ;
2008-05-25 20:35:38 +02:00
public static final String SEEDLIST = " seedURL " ;
2007-04-14 12:11:37 +02:00
/** zero-value */
2010-04-19 18:42:37 +02:00
private static final String ZERO = " 0 " ;
2007-02-02 15:52:54 +01:00
2009-01-04 00:44:42 +01:00
private static final int FLAG_DIRECT_CONNECT = 0 ;
private static final int FLAG_ACCEPT_REMOTE_CRAWL = 1 ;
private static final int FLAG_ACCEPT_REMOTE_INDEX = 2 ;
private static final int FLAG_ACCEPT_CITATION_REFERENCE = 3 ;
2007-06-26 16:37:10 +02:00
public static final String DFLT_NETWORK_UNIT = " freeworld " ;
public static final String DFLT_NETWORK_GROUP = " " ;
2007-10-01 14:30:23 +02:00
private static final Random random = new Random ( System . currentTimeMillis ( ) ) ;
2007-06-26 16:37:10 +02:00
2005-04-07 21:19:42 +02:00
// class variables
2007-04-14 12:11:37 +02:00
/** the peer-hash */
2005-04-07 21:19:42 +02:00
public String hash ;
2007-04-14 12:11:37 +02:00
/** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ...*/
2010-02-15 16:57:35 +01:00
private final ConcurrentHashMap < String , String > dna ;
2010-04-19 18:42:37 +02:00
protected int selectscore = - 1 ; // only for debugging
private String alternativeIP = null ;
2005-04-07 21:19:42 +02:00
2010-02-15 16:57:35 +01:00
public yacySeed ( final String theHash , final ConcurrentHashMap < String , String > theDna ) {
2005-09-21 23:32:43 +02:00
// create a seed with a pre-defined hash map
2008-12-02 00:08:27 +01:00
assert theHash ! = null ;
2005-10-28 09:15:00 +02:00
this . hash = theHash ;
this . dna = theDna ;
2008-06-06 18:01:27 +02:00
final String flags = this . dna . get ( yacySeed . FLAGS ) ;
2006-10-12 00:23:48 +02:00
if ( ( flags = = null ) | | ( flags . length ( ) ! = 4 ) ) { this . dna . put ( yacySeed . FLAGS , yacySeed . FLAGSZERO ) ; }
2008-04-27 00:53:04 +02:00
this . dna . put ( yacySeed . NAME , checkPeerName ( get ( yacySeed . NAME , " ∅ " ) ) ) ;
2005-04-07 21:19:42 +02:00
}
2010-04-19 18:42:37 +02:00
private yacySeed ( final String theHash ) {
2010-02-15 16:57:35 +01:00
this . dna = new ConcurrentHashMap < String , String > ( ) ;
2005-09-21 23:32:43 +02:00
// settings that can only be computed by originating peer:
// at first startup -
2006-10-12 00:23:48 +02:00
this . hash = theHash ; // the hash key of the peer - very important. should be static somehow, even after restart
2007-04-14 12:11:37 +02:00
this . dna . put ( yacySeed . NAME , " ∅ " ) ;
this . dna . put ( yacySeed . BDATE , " ∅ " ) ;
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . UTC , " +0000 " ) ;
2005-09-21 23:32:43 +02:00
// later during operation -
2007-04-14 12:11:37 +02:00
this . dna . put ( yacySeed . ISPEED , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . RSPEED , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . UPTIME , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . LCOUNT , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . NCOUNT , yacySeed . ZERO ) ;
2007-08-22 02:59:05 +02:00
this . dna . put ( yacySeed . RCOUNT , yacySeed . ZERO ) ;
2007-04-14 12:11:37 +02:00
this . dna . put ( yacySeed . ICOUNT , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . SCOUNT , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . CCOUNT , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . VERSION , yacySeed . ZERO ) ;
2005-04-07 21:19:42 +02:00
2005-09-21 23:32:43 +02:00
// settings that is created during the 'hello' phase - in first contact
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . IP , " " ) ; // 123.234.345.456
this . dna . put ( yacySeed . PORT , " ∅ " ) ;
this . dna . put ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_VIRGIN ) ; // virgin/junior/senior/principal
2007-04-14 12:11:37 +02:00
this . dna . put ( yacySeed . IPTYPE , " ∅ " ) ;
2005-04-07 21:19:42 +02:00
2005-09-21 23:32:43 +02:00
// settings that can only be computed by visiting peer
2010-01-13 21:01:46 +01:00
this . dna . put ( yacySeed . LASTSEEN , DateFormatter . formatShortSecond ( new Date ( System . currentTimeMillis ( ) /*- DateFormatter.UTCDiff()*/ ) ) ) ; // for last-seen date
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . USPEED , yacySeed . ZERO ) ; // the computated uplink speed of the peer
2005-04-07 21:19:42 +02:00
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . CRWCNT , yacySeed . ZERO ) ;
this . dna . put ( yacySeed . CRTCNT , yacySeed . ZERO ) ;
2005-11-11 00:48:20 +01:00
2005-09-21 23:32:43 +02:00
// settings that are needed to organize the seed round-trip
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . FLAGS , yacySeed . FLAGSZERO ) ;
2005-04-07 21:19:42 +02:00
setFlagDirectConnect ( false ) ;
setFlagAcceptRemoteCrawl ( true ) ;
setFlagAcceptRemoteIndex ( true ) ;
2006-01-18 15:52:24 +01:00
setFlagAcceptCitationReference ( true ) ;
2005-11-11 00:48:20 +01:00
setUnusedFlags ( ) ;
2006-03-18 22:47:04 +01:00
2005-04-07 21:19:42 +02:00
// index transfer
2006-10-12 00:23:48 +02:00
this . dna . put ( yacySeed . INDEX_OUT , yacySeed . ZERO ) ; // send index
this . dna . put ( yacySeed . INDEX_IN , yacySeed . ZERO ) ; // received index
this . dna . put ( yacySeed . URL_OUT , yacySeed . ZERO ) ; // send URLs
this . dna . put ( yacySeed . URL_IN , yacySeed . ZERO ) ; // received URLs
2005-04-07 21:19:42 +02:00
}
2007-05-15 17:48:28 +02:00
2008-04-27 00:53:04 +02:00
/ * *
* check the peer name : protect against usage as XSS hack
* @param name
* @return a checked name without " < " and " > "
* /
2010-09-07 19:13:47 +02:00
final static Pattern ltp = Pattern . compile ( " < " ) ;
final static Pattern gtp = Pattern . compile ( " > " ) ;
2008-08-02 15:57:00 +02:00
private static String checkPeerName ( String name ) {
2010-09-07 19:13:47 +02:00
name = ltp . matcher ( name ) . replaceAll ( " _ " ) ;
name = gtp . matcher ( name ) . replaceAll ( " _ " ) ;
2008-04-27 00:53:04 +02:00
return name ;
}
2007-05-15 17:48:28 +02:00
/ * *
* Checks for the static fragments of a generated default peer name , such as the string ' dpn '
* @see # makeDefaultPeerName ( )
* @param name the peer name to check for default peer name compliance
* @return whether the given peer name may be a default generated peer name
* /
2008-08-02 14:12:04 +02:00
public static boolean isDefaultPeerName ( final String name ) {
2007-05-15 17:48:28 +02:00
return ( name ! = null & &
name . length ( ) > 10 & &
name . charAt ( 0 ) < = '9' & &
name . charAt ( name . length ( ) - 1 ) < = '9' & &
name . indexOf ( " dpn " ) > 0 ) ;
2006-03-02 23:25:46 +01:00
}
2007-04-30 00:05:34 +02:00
/ * *
* used when doing routing within a cluster ; this can assign a ip and a port
* that is used instead the address stored in the seed DNA
* /
2008-08-02 14:12:04 +02:00
public void setAlternativeAddress ( final String ipport ) {
2008-02-10 16:57:52 +01:00
if ( ipport = = null ) return ;
2008-08-02 14:12:04 +02:00
final int p = ipport . indexOf ( ':' ) ;
2008-02-10 16:57:52 +01:00
if ( p < 0 ) this . alternativeIP = ipport ; else this . alternativeIP = ipport . substring ( 0 , p ) ;
2007-04-30 00:05:34 +02:00
}
2006-10-12 00:23:48 +02:00
2005-11-03 15:16:16 +01:00
/ * *
* try to get the IP < br >
* @return the IP or null
* /
2009-05-29 16:16:03 +02:00
public final String getIP ( ) {
String ip = get ( yacySeed . IP , " localhost " ) ;
return ( ip = = null | | ip . length ( ) = = 0 ) ? " localhost " : ip ;
}
2005-11-03 15:16:16 +01:00
/ * *
* try to get the peertype < br >
* @return the peertype or null
* /
2006-10-12 00:23:48 +02:00
public final String getPeerType ( ) { return get ( yacySeed . PEERTYPE , " " ) ; }
2006-01-23 00:14:37 +01:00
/ * *
* try to get the peertype < br >
2008-02-10 16:57:52 +01:00
* @return the peertype or " virgin "
2006-01-23 00:14:37 +01:00
* /
2006-10-12 00:23:48 +02:00
public final String orVirgin ( ) { return get ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_VIRGIN ) ; }
2005-11-03 15:16:16 +01:00
/ * *
* try to get the peertype < br >
* @return the peertype or " junior "
* /
2006-10-12 00:23:48 +02:00
public final String orJunior ( ) { return get ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_JUNIOR ) ; }
2005-11-03 15:16:16 +01:00
/ * *
* try to get the peertype < br >
* @return the peertype or " senior "
* /
2006-10-12 00:23:48 +02:00
public final String orSenior ( ) { return get ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_SENIOR ) ; }
2005-11-03 15:16:16 +01:00
/ * *
* try to get the peertype < br >
* @return the peertype or " principal "
* /
2006-10-12 00:23:48 +02:00
public final String orPrincipal ( ) { return get ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_PRINCIPAL ) ; }
2005-10-18 01:12:40 +02:00
2007-04-14 12:11:37 +02:00
/ * *
* Get a value from the peer ' s DNA ( its set of peer defining values , e . g . IP , name , version , . . . )
* @param key the key for the value to fetch
* @param dflt the default value
* /
2008-08-02 14:12:04 +02:00
public final String get ( final String key , final String dflt ) {
2005-10-28 09:15:00 +02:00
final Object o = this . dna . get ( key ) ;
if ( o = = null ) { return dflt ; }
return ( String ) o ;
2005-04-07 21:19:42 +02:00
}
2007-11-09 10:40:42 +01:00
2008-08-02 14:12:04 +02:00
public final long getLong ( final String key , final long dflt ) {
2007-11-09 10:40:42 +01:00
final Object o = this . dna . get ( key ) ;
if ( o = = null ) { return dflt ; }
2007-11-12 15:39:30 +01:00
if ( o instanceof String ) try {
2007-11-09 10:40:42 +01:00
return Long . parseLong ( ( String ) o ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NumberFormatException e ) {
2007-11-09 10:40:42 +01:00
return dflt ;
2007-11-12 15:39:30 +01:00
} else if ( o instanceof Long ) {
return ( ( Long ) o ) . longValue ( ) ;
} else if ( o instanceof Integer ) {
2008-06-06 18:01:27 +02:00
return ( ( Integer ) o ) . intValue ( ) ;
2007-11-12 15:39:30 +01:00
} else return dflt ;
2007-11-09 10:40:42 +01:00
}
2005-04-07 21:19:42 +02:00
2008-08-02 14:12:04 +02:00
public final void setIP ( final String ip ) { dna . put ( yacySeed . IP , ip ) ; }
public final void setPort ( final String port ) { dna . put ( yacySeed . PORT , port ) ; }
public final void setType ( final String type ) { dna . put ( yacySeed . PEERTYPE , type ) ; }
2008-02-10 16:57:52 +01:00
public final void setJunior ( ) { dna . put ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_JUNIOR ) ; }
public final void setSenior ( ) { dna . put ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_SENIOR ) ; }
public final void setPrincipal ( ) { dna . put ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_PRINCIPAL ) ; }
2008-08-02 14:12:04 +02:00
public final void put ( final String key , final String value ) {
2006-10-12 00:23:48 +02:00
synchronized ( this . dna ) {
2005-10-28 09:15:00 +02:00
this . dna . put ( key , value ) ;
}
2005-04-07 21:19:42 +02:00
}
2007-05-15 17:48:28 +02:00
/** @return the DNA-map of this peer */
2008-10-10 10:39:11 +02:00
public final Map < String , String > getMap ( ) {
2005-10-28 09:15:00 +02:00
return this . dna ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2006-10-12 00:23:48 +02:00
public final String getName ( ) {
2008-04-27 00:53:04 +02:00
return checkPeerName ( get ( yacySeed . NAME , " ∅ " ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2006-10-12 00:23:48 +02:00
public final String getHexHash ( ) {
2005-10-28 09:15:00 +02:00
return b64Hash2hexHash ( this . hash ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
public final void incSI ( final int count ) {
2008-06-06 18:01:27 +02:00
String v = this . dna . get ( yacySeed . INDEX_OUT ) ;
2006-10-12 00:23:48 +02:00
if ( v = = null ) { v = yacySeed . ZERO ; }
2009-01-05 19:28:27 +01:00
dna . put ( yacySeed . INDEX_OUT , Long . toString ( Long . parseLong ( v ) + ( long ) count ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2008-08-02 14:12:04 +02:00
public final void incRI ( final int count ) {
2008-06-06 18:01:27 +02:00
String v = this . dna . get ( yacySeed . INDEX_IN ) ;
2006-10-12 00:23:48 +02:00
if ( v = = null ) { v = yacySeed . ZERO ; }
2009-01-05 19:28:27 +01:00
dna . put ( yacySeed . INDEX_IN , Long . toString ( Long . parseLong ( v ) + ( long ) count ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2008-08-02 14:12:04 +02:00
public final void incSU ( final int count ) {
2008-06-06 18:01:27 +02:00
String v = this . dna . get ( yacySeed . URL_OUT ) ;
2006-10-12 00:23:48 +02:00
if ( v = = null ) { v = yacySeed . ZERO ; }
2009-01-05 19:28:27 +01:00
dna . put ( yacySeed . URL_OUT , Long . toString ( Long . parseLong ( v ) + ( long ) count ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2008-08-02 14:12:04 +02:00
public final void incRU ( final int count ) {
2008-06-06 18:01:27 +02:00
String v = this . dna . get ( yacySeed . URL_IN ) ;
2006-10-12 00:23:48 +02:00
if ( v = = null ) { v = yacySeed . ZERO ; }
2009-01-05 19:28:27 +01:00
dna . put ( yacySeed . URL_IN , Long . toString ( Long . parseLong ( v ) + ( long ) count ) ) ;
2005-04-07 21:19:42 +02:00
}
2009-05-28 17:49:42 +02:00
public final void resetCounters ( ) {
dna . put ( yacySeed . INDEX_OUT , yacySeed . ZERO ) ;
dna . put ( yacySeed . INDEX_IN , yacySeed . ZERO ) ;
dna . put ( yacySeed . URL_OUT , yacySeed . ZERO ) ;
dna . put ( yacySeed . URL_IN , yacySeed . ZERO ) ;
}
2005-04-07 21:19:42 +02:00
2007-05-15 17:48:28 +02:00
/ * *
* < code > 12 * 6 bit = 72 bit = 24 < / code > characters octal - hash
* < p > Octal hashes are used for cache - dumps that are DHT - ready < / p >
* < p >
* Cause : the natural order of octal hashes are the same as the b64 - order of b64Hashes .
* a hexhash cannot be used in such cases , and b64Hashes are not appropriate for file names
* < / p >
* @param b64Hash a base64 hash
* @return the octal representation of the given base64 hash
* /
2008-08-02 14:12:04 +02:00
public static String b64Hash2octalHash ( final String b64Hash ) {
2009-07-06 17:21:43 +02:00
return Digest . encodeOctal ( Base64Order . enhancedCoder . decode ( b64Hash ) ) ;
2007-02-06 17:01:03 +01:00
}
2007-05-15 17:48:28 +02:00
/ * *
* < code > 12 * 6 bit = 72 bit = 18 < / code > characters hex - hash
* @param b64Hash a base64 hash
* @return the hexadecimal representation of the given base64 hash
* /
2008-08-02 14:12:04 +02:00
public static String b64Hash2hexHash ( final String b64Hash ) {
2005-12-22 02:01:46 +01:00
// the hash string represents 12 * 6 bit = 72 bits. This is too much for a long integer.
2009-07-06 17:21:43 +02:00
return Digest . encodeHex ( Base64Order . enhancedCoder . decode ( b64Hash ) ) ;
2005-12-22 02:01:46 +01:00
}
2007-05-15 17:48:28 +02:00
/ * *
* @param hexHash a hexadecimal hash
* @return the base64 representation of the given hex hash
* /
2008-08-02 14:12:04 +02:00
public static String hexHash2b64Hash ( final String hexHash ) {
2009-01-30 16:33:00 +01:00
return Base64Order . enhancedCoder . encode ( Digest . decodeHex ( hexHash ) ) ;
2005-04-07 21:19:42 +02:00
}
2007-05-15 17:48:28 +02:00
/ * *
* The returned version follows this pattern : < code > MAJORVERSION . MINORVERSION 0 SVN REVISION < / code >
* @return the YaCy version of this peer as a float or < code > 0 < / code > if no valid value could be retrieved
* from this yacySeed object
* /
2006-10-12 00:23:48 +02:00
public final float getVersion ( ) {
2005-04-07 21:19:42 +02:00
try {
2006-10-12 00:23:48 +02:00
return Float . parseFloat ( get ( yacySeed . VERSION , yacySeed . ZERO ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NumberFormatException e ) {
2005-04-07 21:19:42 +02:00
return 0 ;
}
}
2007-05-15 17:48:28 +02:00
/ * *
* @return the public address of the peer as IP : port string or < code > null < / code > if no valid values for
* either the IP or the port could be retrieved from this yacySeed object
* /
2007-04-30 00:05:34 +02:00
public final String getPublicAddress ( ) {
2009-05-29 16:16:03 +02:00
String ip = this . getIP ( ) ;
if ( ip = = null | | ip . length ( ) < 8 ) ip = " localhost " ;
2006-11-24 17:23:08 +01:00
// if (ip.equals(yacyCore.seedDB.mySeed.dna.get(yacySeed.IP))) ip = "127.0.0.1";
// if (this.hash.equals("xxxxxxxxxxxx")) return "192.168.100.1:3300";
2008-06-06 18:01:27 +02:00
final String port = this . dna . get ( yacySeed . PORT ) ;
2007-05-07 22:48:24 +02:00
if ( ( port = = null ) | | ( port . length ( ) < 2 ) ) return null ;
2006-10-12 00:23:48 +02:00
return ip + " : " + port ;
2005-04-07 21:19:42 +02:00
}
2007-04-30 00:05:34 +02:00
2007-05-15 17:48:28 +02:00
/ * *
* If this seed is part of a cluster , the peer has probably the { @linkplain # alternativeIP } object set to
* a local IP . If this is present and the public IP of this peer is identical to the public IP of the own seed ,
* construct an address using this IP ; otherwise return the public address
* @see # getPublicAddress ( )
* @return the alternative IP : port if present , else the public address
* /
2007-04-30 00:05:34 +02:00
public final String getClusterAddress ( ) {
2007-05-07 22:48:24 +02:00
if ( this . alternativeIP = = null ) return getPublicAddress ( ) ;
2007-04-30 00:05:34 +02:00
2008-06-06 18:01:27 +02:00
final String port = this . dna . get ( yacySeed . PORT ) ;
2007-05-07 22:48:24 +02:00
if ( ( port = = null ) | | ( port . length ( ) < 2 ) ) return null ;
2007-04-30 00:05:34 +02:00
return this . alternativeIP + " : " + port ;
}
2007-05-15 17:48:28 +02:00
/ * *
* @return the IP address of the peer represented by this yacySeed object as { @link InetAddress }
* /
2006-12-20 02:07:49 +01:00
public final InetAddress getInetAddress ( ) {
2010-09-14 17:27:27 +02:00
return Domains . dnsResolve ( this . getIP ( ) ) ;
2006-12-20 02:07:49 +01:00
}
2007-05-15 17:48:28 +02:00
/** @return the portnumber of this seed or <code>-1</code> if not present */
2006-12-20 02:07:49 +01:00
public final int getPort ( ) {
2008-06-06 18:01:27 +02:00
final String port = this . dna . get ( yacySeed . PORT ) ;
2006-12-20 02:07:49 +01:00
if ( port = = null ) return - 1 ;
2007-04-14 12:11:37 +02:00
/*if (port.length() < 2) return -1; It is possible to use port 0-9*/
2006-12-20 02:07:49 +01:00
return Integer . parseInt ( port ) ;
}
2007-05-15 17:48:28 +02:00
/ * *
* To synchronize peer pings the local time differential must be included in calculations .
* @return the difference to UTC ( universal time coordinated ) in milliseconds of this yacySeed ,
* the difference to < code > + 0130 < / code > if not present or < code > 0 < / code > if an error occured during conversion
* /
2010-01-13 21:01:46 +01:00
/ *
2006-10-12 00:23:48 +02:00
public final long getUTCDiff ( ) {
2008-06-06 18:01:27 +02:00
String utc = this . dna . get ( yacySeed . UTC ) ;
2006-01-25 02:50:24 +01:00
if ( utc = = null ) { utc = " +0130 " ; }
2005-10-31 03:43:55 +01:00
try {
2009-01-30 16:33:00 +01:00
return DateFormatter . UTCDiff ( utc ) ;
2008-08-02 14:12:04 +02:00
} catch ( final IllegalArgumentException e ) {
2005-10-31 03:43:55 +01:00
return 0 ;
}
2005-09-27 18:28:55 +02:00
}
2010-01-13 21:01:46 +01:00
* /
2007-05-15 17:48:28 +02:00
/** puts the current time into the lastseen field and cares about the time differential to UTC */
2007-02-03 00:54:27 +01:00
public final void setLastSeenUTC ( ) {
// because java thinks it must apply the UTC offset to the current time,
// to create a string that looks like our current time, it adds the local UTC offset to the
// time. To create a corrected UTC Date string, we first subtract the local UTC offset.
2010-01-13 21:01:46 +01:00
String ls = DateFormatter . formatShortSecond ( new Date ( System . currentTimeMillis ( ) /*- DateFormatter.UTCDiff()*/ ) ) ;
//System.out.println("SETTING LAST-SEEN of " + this.getName() + " to " + ls);
dna . put ( yacySeed . LASTSEEN , ls ) ;
2007-02-03 00:54:27 +01:00
}
2007-05-15 17:48:28 +02:00
/ * *
* @return the last seen time converted to UTC in milliseconds
* /
2007-02-03 00:54:27 +01:00
public final long getLastSeenUTC ( ) {
2005-09-27 18:28:55 +02:00
try {
2009-01-30 16:33:00 +01:00
final long t = DateFormatter . parseShortSecond ( get ( yacySeed . LASTSEEN , " 20040101000000 " ) ) . getTime ( ) ;
2007-02-03 00:54:27 +01:00
// getTime creates a UTC time number. But in this case java thinks, that the given
// time string is a local time, which has a local UTC offset applied.
// Therefore java subtracts the local UTC offset, to get a UTC number.
// But the given time string is already in UTC time, so the subtraction
// of the local UTC offset is wrong. We correct this here by adding the local UTC
// offset again.
2010-01-13 21:01:46 +01:00
return t /*+ DateFormatter.UTCDiff()*/ ;
2008-08-08 15:56:29 +02:00
} catch ( final java . text . ParseException e ) { // in case of an error make seed look old!!!
2009-01-30 16:33:00 +01:00
return System . currentTimeMillis ( ) - DateFormatter . dayMillis ;
2008-08-02 14:12:04 +02:00
} catch ( final java . lang . NumberFormatException e ) {
2009-01-30 16:33:00 +01:00
return System . currentTimeMillis ( ) - DateFormatter . dayMillis ;
2005-09-27 18:28:55 +02:00
}
}
2007-02-02 14:12:31 +01:00
2007-05-15 17:48:28 +02:00
/ * *
* @see # getLastSeenUTC ( )
* @return the last seen value as string representation in the following format : YearMonthDayHoursMinutesSeconds
* or < code > 20040101000000 < / code > if not present
* /
2007-02-02 14:12:31 +01:00
public final String getLastSeenString ( ) {
return get ( yacySeed . LASTSEEN , " 20040101000000 " ) ;
}
2005-10-16 01:37:37 +02:00
2007-05-15 17:48:28 +02:00
/** @return the age of the seed in number of days */
2006-10-12 00:23:48 +02:00
public final int getAge ( ) {
2005-09-29 00:54:31 +02:00
try {
2009-01-30 16:33:00 +01:00
final long t = DateFormatter . parseShortSecond ( get ( yacySeed . BDATE , " 20040101000000 " ) ) . getTime ( ) ;
2010-01-13 21:01:46 +01:00
return ( int ) ( ( System . currentTimeMillis ( ) - ( t /*- getUTCDiff() + DateFormatter.UTCDiff()*/ ) ) / 1000 / 60 / 60 / 24 ) ;
2008-08-02 14:12:04 +02:00
} catch ( final java . text . ParseException e ) {
2005-09-29 00:54:31 +02:00
return - 1 ;
2008-08-02 14:12:04 +02:00
} catch ( final java . lang . NumberFormatException e ) {
2005-09-29 00:54:31 +02:00
return - 1 ;
}
}
2005-10-16 01:37:37 +02:00
2008-08-02 14:12:04 +02:00
public void setPeerTags ( final Set < String > keys ) {
2009-10-11 02:24:42 +02:00
dna . put ( PEERTAGS , MapTools . set2string ( keys , " | " , false ) ) ;
2006-10-10 22:09:26 +02:00
}
2006-10-12 00:23:48 +02:00
2008-01-11 15:13:08 +01:00
public Set < String > getPeerTags ( ) {
2009-10-11 02:24:42 +02:00
return MapTools . string2set ( get ( PEERTAGS , " * " ) , " | " ) ;
2006-10-10 22:09:26 +02:00
}
2006-10-12 00:23:48 +02:00
2010-04-15 15:22:59 +02:00
public boolean matchPeerTags ( final HandleSet searchHashes ) {
2008-08-02 14:12:04 +02:00
final String peertags = get ( PEERTAGS , " " ) ;
2008-07-01 01:04:32 +02:00
if ( peertags . equals ( " * " ) ) return true ;
2009-10-11 02:24:42 +02:00
final Set < String > tags = MapTools . string2set ( peertags , " | " ) ;
2008-08-02 14:12:04 +02:00
final Iterator < String > i = tags . iterator ( ) ;
2007-04-26 11:51:51 +02:00
while ( i . hasNext ( ) ) {
2010-04-15 15:22:59 +02:00
if ( searchHashes . has ( Word . word2hash ( i . next ( ) ) ) ) return true ;
2007-04-26 11:51:51 +02:00
}
return false ;
}
2005-07-19 02:26:31 +02:00
public int getPPM ( ) {
try {
2006-10-12 00:23:48 +02:00
return Integer . parseInt ( get ( yacySeed . ISPEED , yacySeed . ZERO ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NumberFormatException e ) {
2005-07-19 02:26:31 +02:00
return 0 ;
}
}
2005-09-21 23:32:43 +02:00
2007-02-01 01:17:35 +01:00
public double getQPM ( ) {
2007-01-31 16:39:11 +01:00
try {
2007-02-01 01:17:35 +01:00
return Double . parseDouble ( get ( yacySeed . RSPEED , yacySeed . ZERO ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NumberFormatException e ) {
2007-02-01 01:17:35 +01:00
return 0d ;
2007-01-31 16:39:11 +01:00
}
}
2006-10-12 00:23:48 +02:00
public final long getLinkCount ( ) {
2005-10-08 02:37:43 +02:00
try {
2007-11-09 10:40:42 +01:00
return getLong ( yacySeed . LCOUNT , 0 ) ;
2008-08-02 14:12:04 +02:00
} catch ( final NumberFormatException e ) {
2005-10-08 02:37:43 +02:00
return 0 ;
}
}
2005-10-16 01:37:37 +02:00
2009-12-21 12:36:48 +01:00
public final long getWordCount ( ) {
try {
return getLong ( yacySeed . ICOUNT , 0 ) ;
} catch ( final NumberFormatException e ) {
return 0 ;
}
}
2008-08-02 14:12:04 +02:00
private boolean getFlag ( final int flag ) {
2006-10-12 00:23:48 +02:00
final String flags = get ( yacySeed . FLAGS , yacySeed . FLAGSZERO ) ;
2005-04-07 21:19:42 +02:00
return ( new bitfield ( flags . getBytes ( ) ) ) . get ( flag ) ;
}
2005-09-21 23:32:43 +02:00
2008-08-02 14:12:04 +02:00
private void setFlag ( final int flag , final boolean value ) {
2006-10-12 00:23:48 +02:00
String flags = get ( yacySeed . FLAGS , yacySeed . FLAGSZERO ) ;
if ( flags . length ( ) ! = 4 ) { flags = yacySeed . FLAGSZERO ; }
2005-10-16 01:37:37 +02:00
final bitfield f = new bitfield ( flags . getBytes ( ) ) ;
2005-04-07 21:19:42 +02:00
f . set ( flag , value ) ;
2007-02-03 00:54:27 +01:00
dna . put ( yacySeed . FLAGS , new String ( f . getBytes ( ) ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2009-01-04 00:44:42 +01:00
public final void setFlagDirectConnect ( final boolean value ) { setFlag ( FLAG_DIRECT_CONNECT , value ) ; }
public final void setFlagAcceptRemoteCrawl ( final boolean value ) { setFlag ( FLAG_ACCEPT_REMOTE_CRAWL , value ) ; }
public final void setFlagAcceptRemoteIndex ( final boolean value ) { setFlag ( FLAG_ACCEPT_REMOTE_INDEX , value ) ; }
public final void setFlagAcceptCitationReference ( final boolean value ) { setFlag ( FLAG_ACCEPT_CITATION_REFERENCE , value ) ; }
2006-10-12 00:23:48 +02:00
public final boolean getFlagDirectConnect ( ) { return getFlag ( 0 ) ; }
public final boolean getFlagAcceptRemoteCrawl ( ) {
2005-04-07 21:19:42 +02:00
//if (getVersion() < 0.300) return false;
//if (getVersion() < 0.334) return true;
return getFlag ( 1 ) ;
}
2006-10-12 00:23:48 +02:00
public final boolean getFlagAcceptRemoteIndex ( ) {
2005-04-07 21:19:42 +02:00
//if (getVersion() < 0.335) return false;
return getFlag ( 2 ) ;
}
2006-10-12 00:23:48 +02:00
public final boolean getFlagAcceptCitationReference ( ) {
2006-01-18 15:52:24 +01:00
return getFlag ( 3 ) ;
}
2006-10-12 00:23:48 +02:00
public final void setUnusedFlags ( ) {
for ( int i = 4 ; i < 24 ; i + + ) { setFlag ( i , true ) ; }
2005-11-11 00:48:20 +01:00
}
2008-08-02 14:12:04 +02:00
public final boolean isType ( final String type ) {
2008-02-10 16:57:52 +01:00
return get ( yacySeed . PEERTYPE , " " ) . equals ( type ) ;
2006-09-14 11:28:17 +02:00
}
2006-10-12 00:23:48 +02:00
public final boolean isVirgin ( ) {
return get ( yacySeed . PEERTYPE , " " ) . equals ( yacySeed . PEERTYPE_VIRGIN ) ;
2005-04-07 21:19:42 +02:00
}
2006-10-12 00:23:48 +02:00
public final boolean isJunior ( ) {
return get ( yacySeed . PEERTYPE , " " ) . equals ( yacySeed . PEERTYPE_JUNIOR ) ;
2005-04-07 21:19:42 +02:00
}
2006-10-12 00:23:48 +02:00
public final boolean isSenior ( ) {
return get ( yacySeed . PEERTYPE , " " ) . equals ( yacySeed . PEERTYPE_SENIOR ) ;
2005-04-07 21:19:42 +02:00
}
2006-10-12 00:23:48 +02:00
public final boolean isPrincipal ( ) {
return get ( yacySeed . PEERTYPE , " " ) . equals ( yacySeed . PEERTYPE_PRINCIPAL ) ;
2005-04-07 21:19:42 +02:00
}
2008-02-10 16:57:52 +01:00
public final boolean isPotential ( ) {
return isVirgin ( ) | | isJunior ( ) ;
}
public final boolean isActive ( ) {
return isSenior ( ) | | isPrincipal ( ) ;
}
2006-10-12 00:23:48 +02:00
public final boolean isOnline ( ) {
return isSenior ( ) | | isPrincipal ( ) ;
2005-04-07 21:19:42 +02:00
}
2006-10-12 00:23:48 +02:00
public final boolean isOnline ( final String type ) {
return type . equals ( yacySeed . PEERTYPE_SENIOR ) | | type . equals ( yacySeed . PEERTYPE_PRINCIPAL ) ;
2005-10-18 01:12:40 +02:00
}
2008-02-10 16:57:52 +01:00
2009-04-16 17:29:00 +02:00
private static byte [ ] bestGap ( final yacySeedDB seedDB ) {
2008-03-11 00:28:05 +01:00
if ( ( seedDB = = null ) | | ( seedDB . sizeConnected ( ) < = 2 ) ) {
// use random hash
return randomHash ( ) ;
}
// find gaps
2008-11-03 01:27:23 +01:00
final TreeMap < Long , String > gaps = hashGaps ( seedDB ) ;
2008-03-11 00:28:05 +01:00
// take one gap; prefer biggest but take also another smaller by chance
String interval = null ;
2008-08-02 14:12:04 +02:00
final Random r = new Random ( ) ;
2009-12-02 01:37:59 +01:00
while ( ! gaps . isEmpty ( ) ) {
2008-03-11 00:28:05 +01:00
interval = gaps . remove ( gaps . lastKey ( ) ) ;
if ( r . nextBoolean ( ) ) break ;
}
if ( interval = = null ) return randomHash ( ) ;
// find dht position and size of gap
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
final long gaphalf = FlatWordPartitionScheme . dhtDistance (
2009-04-16 17:29:00 +02:00
FlatWordPartitionScheme . std . dhtPosition ( interval . substring ( 0 , 12 ) . getBytes ( ) , null ) ,
FlatWordPartitionScheme . std . dhtPosition ( interval . substring ( 12 ) . getBytes ( ) , null ) ) > > 1 ;
long p = FlatWordPartitionScheme . std . dhtPosition ( interval . substring ( 0 , 12 ) . getBytes ( ) , null ) ;
2008-11-03 01:27:23 +01:00
long gappos = ( Long . MAX_VALUE - p > = gaphalf ) ? p + gaphalf : ( p - Long . MAX_VALUE ) + gaphalf ;
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
return FlatWordPartitionScheme . positionToHash ( gappos ) ;
2008-03-11 00:28:05 +01:00
}
2008-11-03 01:27:23 +01:00
private static TreeMap < Long , String > hashGaps ( final yacySeedDB seedDB ) {
final TreeMap < Long , String > gaps = new TreeMap < Long , String > ( ) ;
2008-03-11 00:28:05 +01:00
if ( seedDB = = null ) return gaps ;
2008-08-02 14:12:04 +02:00
final Iterator < yacySeed > i = seedDB . seedsConnected ( true , false , null , ( float ) 0 . 0 ) ;
2008-11-03 01:27:23 +01:00
long l ;
2008-03-11 00:28:05 +01:00
yacySeed s0 = null , s1 , first = null ;
while ( i . hasNext ( ) ) {
s1 = i . next ( ) ;
if ( s0 = = null ) {
s0 = s1 ;
first = s0 ;
continue ;
}
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
l = FlatWordPartitionScheme . dhtDistance (
2009-04-16 17:29:00 +02:00
FlatWordPartitionScheme . std . dhtPosition ( s0 . hash . getBytes ( ) , null ) ,
FlatWordPartitionScheme . std . dhtPosition ( s1 . hash . getBytes ( ) , null ) ) ;
2008-11-03 01:27:23 +01:00
gaps . put ( l , s0 . hash + s1 . hash ) ;
2008-03-11 00:28:05 +01:00
s0 = s1 ;
}
// compute also the last gap
if ( ( first ! = null ) & & ( s0 ! = null ) ) {
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
l = FlatWordPartitionScheme . dhtDistance (
2009-04-16 17:29:00 +02:00
FlatWordPartitionScheme . std . dhtPosition ( s0 . hash . getBytes ( ) , null ) ,
FlatWordPartitionScheme . std . dhtPosition ( first . hash . getBytes ( ) , null ) ) ;
2008-11-03 01:27:23 +01:00
gaps . put ( l , s0 . hash + first . hash ) ;
2008-03-11 00:28:05 +01:00
}
return gaps ;
}
2007-10-01 14:30:23 +02:00
2008-08-02 14:12:04 +02:00
public static yacySeed genLocalSeed ( final yacySeedDB db ) {
2008-05-06 01:13:47 +02:00
return genLocalSeed ( db , 0 , null ) ; // an anonymous peer
}
2008-08-02 14:12:04 +02:00
public static yacySeed genLocalSeed ( final yacySeedDB db , final int port , final String name ) {
2008-05-06 01:13:47 +02:00
// generate a seed for the local peer
2006-10-12 00:23:48 +02:00
// this is the birthplace of a seed, that then will start to travel to other peers
2009-05-09 21:07:34 +02:00
final String hashs = new String ( bestGap ( db ) ) ;
yacyCore . log . logInfo ( " init: OWN SEED = " + hashs ) ;
2006-10-12 00:23:48 +02:00
2009-05-09 21:07:34 +02:00
final yacySeed newSeed = new yacySeed ( hashs ) ;
2006-10-12 00:23:48 +02:00
// now calculate other information about the host
2008-05-06 01:13:47 +02:00
newSeed . dna . put ( yacySeed . NAME , ( name ) = = null ? " anonymous " : name ) ;
newSeed . dna . put ( yacySeed . PORT , Integer . toString ( ( port < = 0 ) ? 8080 : port ) ) ;
2010-01-13 21:01:46 +01:00
newSeed . dna . put ( yacySeed . BDATE , DateFormatter . formatShortSecond ( new Date ( System . currentTimeMillis ( ) /*- DateFormatter.UTCDiff()*/ ) ) ) ;
2006-10-12 00:23:48 +02:00
newSeed . dna . put ( yacySeed . LASTSEEN , newSeed . dna . get ( yacySeed . BDATE ) ) ; // just as initial setting
2009-01-30 16:33:00 +01:00
newSeed . dna . put ( yacySeed . UTC , DateFormatter . UTCDiffString ( ) ) ;
2006-10-12 00:23:48 +02:00
newSeed . dna . put ( yacySeed . PEERTYPE , yacySeed . PEERTYPE_VIRGIN ) ;
return newSeed ;
2005-04-07 21:19:42 +02:00
}
2005-11-14 01:23:20 +01:00
//public static String randomHash() { return "zLXFf5lTteUv"; } // only for debugging
2006-10-12 00:23:48 +02:00
2009-04-16 17:29:00 +02:00
public static byte [ ] randomHash ( ) {
2006-10-12 00:23:48 +02:00
final String hash =
2009-01-30 16:33:00 +01:00
Base64Order . enhancedCoder . encode ( Digest . encodeMD5Raw ( Long . toString ( random . nextLong ( ) ) ) ) . substring ( 0 , 6 ) +
Base64Order . enhancedCoder . encode ( Digest . encodeMD5Raw ( Long . toString ( random . nextLong ( ) ) ) ) . substring ( 0 , 6 ) ;
2009-04-16 17:29:00 +02:00
return hash . getBytes ( ) ;
2005-11-11 00:48:20 +01:00
}
2006-10-12 00:23:48 +02:00
2008-08-02 14:12:04 +02:00
public static yacySeed genRemoteSeed ( final String seedStr , final String key , final boolean ownSeed ) {
2005-09-27 18:28:55 +02:00
// this method is used to convert the external representation of a seed into a seed object
2008-05-25 20:35:38 +02:00
// yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key);
// check protocol and syntax of seed
2010-09-14 15:35:47 +02:00
if ( seedStr = = null | | seedStr . length ( ) = = 0 ) return null ;
2005-10-16 01:37:37 +02:00
final String seed = crypt . simpleDecode ( seedStr , key ) ;
2010-09-14 15:35:47 +02:00
if ( seed = = null | | seed . length ( ) = = 0 ) return null ;
2008-05-25 20:35:38 +02:00
// extract hash
2010-02-15 16:57:35 +01:00
final ConcurrentHashMap < String , String > dna = MapTools . string2map ( seed , " , " ) ;
2008-05-25 20:35:38 +02:00
final String hash = dna . remove ( yacySeed . HASH ) ;
2008-12-02 00:08:27 +01:00
if ( hash = = null ) return null ;
2006-10-12 00:23:48 +02:00
final yacySeed resultSeed = new yacySeed ( hash , dna ) ;
2008-05-25 20:35:38 +02:00
// check semantics of content
2008-06-05 00:24:00 +02:00
final String testResult = resultSeed . isProper ( ownSeed ) ;
2008-05-25 20:35:38 +02:00
if ( testResult ! = null ) {
2008-09-03 02:30:21 +02:00
if ( yacyCore . log . isFinest ( ) ) yacyCore . log . logFinest ( " seed is not proper ( " + testResult + " ): " + resultSeed ) ;
2008-05-25 20:35:38 +02:00
return null ;
2006-04-06 18:28:28 +02:00
}
2008-05-25 20:35:38 +02:00
// seed ok
2006-04-06 18:28:28 +02:00
return resultSeed ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
public final String isProper ( final boolean checkOwnIP ) {
2008-05-25 20:35:38 +02:00
// checks if everything is ok with that seed
// check hash
if ( this . hash = = null ) return " hash is null " ;
2009-10-11 02:12:19 +02:00
if ( this . hash . length ( ) ! = Word . commonHashLength ) return " wrong hash length ( " + this . hash . length ( ) + " ) " ;
2008-05-25 20:35:38 +02:00
// name
2008-08-02 14:12:04 +02:00
final String peerName = this . dna . get ( yacySeed . NAME ) ;
2008-05-25 20:35:38 +02:00
if ( peerName = = null ) return " no peer name given " ;
dna . put ( yacySeed . NAME , checkPeerName ( peerName ) ) ;
2008-08-08 15:56:29 +02:00
// type
final String peerType = this . getPeerType ( ) ;
if ( ( peerType = = null ) | |
! ( peerType . equals ( yacySeed . PEERTYPE_VIRGIN ) | | peerType . equals ( yacySeed . PEERTYPE_JUNIOR )
| | peerType . equals ( yacySeed . PEERTYPE_SENIOR ) | | peerType . equals ( yacySeed . PEERTYPE_PRINCIPAL ) ) )
return " invalid peerType ' " + peerType + " ' " ;
2008-05-25 20:35:38 +02:00
// check IP
2008-06-05 00:24:00 +02:00
if ( ! checkOwnIP ) {
// checking of IP is omitted if we read the own seed file
2009-05-29 16:16:03 +02:00
final String ipCheck = isProperIP ( this . getIP ( ) ) ;
2008-06-05 13:01:20 +02:00
if ( ipCheck ! = null ) return ipCheck ;
2008-06-05 00:24:00 +02:00
}
2008-05-25 20:41:56 +02:00
2008-05-25 20:35:38 +02:00
// seedURL
final String seedURL = this . dna . get ( SEEDLIST ) ;
if ( seedURL ! = null & & seedURL . length ( ) > 0 ) {
2008-05-25 22:30:37 +02:00
if ( ! seedURL . startsWith ( " http:// " ) & & ! seedURL . startsWith ( " https:// " ) ) return " wrong protocol for seedURL " ;
2008-05-25 20:35:38 +02:00
try {
2008-08-02 14:12:04 +02:00
final URL url = new URL ( seedURL ) ;
final String host = url . getHost ( ) ;
2008-05-25 22:30:37 +02:00
if ( host . equals ( " localhost " ) | | host . startsWith ( " 127. " ) | | ( host . startsWith ( " 0:0:0:0:0:0:0:1 " ) ) ) return " seedURL in localhost rejected " ;
2008-08-02 14:12:04 +02:00
} catch ( final MalformedURLException e ) {
2008-05-25 20:35:38 +02:00
return " seedURL malformed " ;
}
}
return null ;
}
2008-05-25 20:41:56 +02:00
2008-08-02 14:12:04 +02:00
public static final String isProperIP ( final String ipString ) {
2010-04-15 15:22:59 +02:00
// returns null if ipString is proper, a string with the cause otherwise
2008-06-05 13:01:20 +02:00
if ( ipString = = null ) return " IP is null " ;
if ( ipString . length ( ) > 0 & & ipString . length ( ) < 8 ) return " IP is too short: " + ipString ;
2010-09-14 17:27:27 +02:00
InetAddress ip = Domains . dnsResolve ( ipString ) ;
if ( ip = = null ) return " IP is not proper: " + ipString ; //this does not work with staticIP
2008-06-05 13:01:20 +02:00
if ( ipString . equals ( " localhost " ) | | ipString . startsWith ( " 127. " ) | | ( ipString . startsWith ( " 0:0:0:0:0:0:0:1 " ) ) ) return " IP for localhost rejected " ;
return null ;
}
2006-10-12 00:23:48 +02:00
public final String toString ( ) {
2009-09-28 17:23:15 +02:00
HashMap < String , String > copymap = new HashMap < String , String > ( ) ;
copymap . putAll ( this . dna ) ;
copymap . put ( yacySeed . HASH , this . hash ) ; // set hash into seed code structure
2009-10-11 02:24:42 +02:00
return MapTools . map2string ( copymap , " , " , true ) ; // generate string representation
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
public final String genSeedStr ( final String key ) {
2005-10-16 01:37:37 +02:00
// use a default encoding
2008-08-02 14:12:04 +02:00
final String z = this . genSeedStr ( 'z' , key ) ;
final String b = this . genSeedStr ( 'b' , key ) ;
2008-02-17 13:36:43 +01:00
// the compressed string may be longer that the uncompressed if there is too much overhead for compression meta-info
// take simply that string that is shorter
if ( b . length ( ) < z . length ( ) ) return b ; else return z ;
2005-04-07 21:19:42 +02:00
}
2009-09-28 17:23:15 +02:00
public final String genSeedStr ( final char method , final String key ) {
2006-10-12 00:23:48 +02:00
return crypt . simpleEncode ( this . toString ( ) , key , method ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
public final void save ( final File f ) throws IOException {
2008-05-14 22:30:44 +02:00
final String out = this . genSeedStr ( 'p' , null ) ;
2005-10-16 01:37:37 +02:00
final FileWriter fw = new FileWriter ( f ) ;
fw . write ( out , 0 , out . length ( ) ) ;
fw . close ( ) ;
2005-04-07 21:19:42 +02:00
}
2008-08-02 14:12:04 +02:00
public static yacySeed load ( final File f ) throws IOException {
2005-10-16 01:37:37 +02:00
final FileReader fr = new FileReader ( f ) ;
final char [ ] b = new char [ ( int ) f . length ( ) ] ;
fr . read ( b , 0 , b . length ) ;
fr . close ( ) ;
2008-08-02 14:12:04 +02:00
final yacySeed mySeed = genRemoteSeed ( new String ( b ) , null , true ) ;
2008-06-05 00:24:00 +02:00
if ( mySeed = = null ) return null ;
mySeed . dna . put ( yacySeed . IP , " " ) ; // set own IP as unknown
return mySeed ;
2005-04-07 21:19:42 +02:00
}
2008-03-10 00:48:24 +01:00
public final yacySeed clone ( ) {
2010-02-15 16:57:35 +01:00
ConcurrentHashMap < String , String > ndna = new ConcurrentHashMap < String , String > ( ) ;
ndna . putAll ( this . dna ) ;
return new yacySeed ( this . hash , ndna ) ;
2005-04-07 21:19:42 +02:00
}
2008-11-06 11:07:53 +01:00
2005-10-17 17:46:12 +02:00
}