2005-04-07 21:19:42 +02:00
// yacyDHTAction.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
2006-02-21 01:30:38 +01:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.yacy ;
2006-12-14 03:48:37 +01:00
import java.util.ArrayList ;
2005-05-05 07:32:19 +02:00
import java.util.Enumeration ;
2007-03-22 10:08:38 +01:00
import java.util.Hashtable ;
2007-04-26 11:51:51 +02:00
import java.util.TreeSet ;
2005-05-05 07:32:19 +02:00
2006-12-14 03:48:37 +01:00
import de.anomic.kelondro.kelondroBase64Order ;
2007-04-26 11:51:51 +02:00
import de.anomic.kelondro.kelondroCloneableIterator ;
import de.anomic.kelondro.kelondroCloneableSetIterator ;
2005-05-05 07:32:19 +02:00
import de.anomic.kelondro.kelondroException ;
import de.anomic.kelondro.kelondroMScoreCluster ;
2007-04-26 11:51:51 +02:00
import de.anomic.kelondro.kelondroRotateIterator ;
2006-02-20 00:47:45 +01:00
import de.anomic.server.logging.serverLog ;
2005-04-07 21:19:42 +02:00
public class yacyDHTAction implements yacyPeerAction {
2006-08-16 21:49:31 +02:00
protected yacySeedDB seedDB ;
protected kelondroMScoreCluster seedCrawlReady ;
2005-04-07 21:19:42 +02:00
public yacyDHTAction ( yacySeedDB seedDB ) {
this . seedDB = seedDB ;
this . seedCrawlReady = new kelondroMScoreCluster ( ) ;
// init crawl-ready table
try {
2006-11-20 03:46:53 +01:00
Enumeration en = seedDB . seedsConnected ( true , false , null , ( float ) 0 . 0 ) ;
2005-04-07 21:19:42 +02:00
yacySeed ys ;
while ( en . hasMoreElements ( ) ) {
ys = ( yacySeed ) en . nextElement ( ) ;
if ( ( ys ! = null ) & & ( ys . getVersion ( ) > = ( ( float ) 0 . 3 ) ) ) seedCrawlReady . setScore ( ys . hash , yacyCore . yacyTime ( ) ) ;
}
} catch ( IllegalArgumentException e ) {
}
}
2006-11-20 03:46:53 +01:00
public Enumeration getDHTSeeds ( boolean up , String firstHash , float minVersion ) {
2005-04-07 21:19:42 +02:00
// enumerates seed-type objects: all seeds with starting point in the middle, rotating at the end/beginning
2006-11-20 03:46:53 +01:00
return new seedDHTEnum ( up , firstHash , minVersion ) ;
2005-04-07 21:19:42 +02:00
}
class seedDHTEnum implements Enumeration {
2006-11-20 03:46:53 +01:00
Enumeration e1 , e2 ;
2005-04-07 21:19:42 +02:00
boolean up ;
int steps ;
2006-11-20 03:46:53 +01:00
float minVersion ;
2005-04-07 21:19:42 +02:00
2006-11-20 03:46:53 +01:00
public seedDHTEnum ( boolean up , String firstHash , float minVersion ) {
2005-04-07 21:19:42 +02:00
this . steps = seedDB . sizeConnected ( ) ;
this . up = up ;
2006-11-20 03:46:53 +01:00
this . minVersion = minVersion ;
this . e1 = seedDB . seedsConnected ( up , false , firstHash , minVersion ) ;
2005-04-07 21:19:42 +02:00
this . e2 = null ;
2006-11-20 03:46:53 +01:00
}
2005-04-07 21:19:42 +02:00
public boolean hasMoreElements ( ) {
2006-11-20 03:46:53 +01:00
return ( steps > 0 ) & & ( ( e2 = = null ) | | ( e2 . hasMoreElements ( ) ) ) ;
}
2005-04-07 21:19:42 +02:00
public Object nextElement ( ) {
if ( steps = = 0 ) return null ;
steps - - ;
if ( ( e1 ! = null ) & & ( e1 . hasMoreElements ( ) ) ) {
Object n = e1 . nextElement ( ) ;
if ( ! ( e1 . hasMoreElements ( ) ) ) {
e1 = null ;
2006-11-20 03:46:53 +01:00
e2 = seedDB . seedsConnected ( up , false , null , minVersion ) ;
2005-04-07 21:19:42 +02:00
}
return n ;
} else {
if ( e2 = = null ) {
e1 = null ;
2006-11-20 03:46:53 +01:00
e2 = seedDB . seedsConnected ( up , false , null , minVersion ) ;
2005-04-07 21:19:42 +02:00
}
return e2 . nextElement ( ) ;
}
}
}
public Enumeration getAcceptRemoteIndexSeeds ( String starthash ) {
// returns an enumeration of yacySeed-Objects
// that have the AcceptRemoteIndex-Flag set
// the seeds are enumerated in the right order according DHT
return new acceptRemoteIndexSeedEnum ( starthash ) ;
}
class acceptRemoteIndexSeedEnum implements Enumeration {
2006-11-20 03:46:53 +01:00
Enumeration se ;
2005-04-07 21:19:42 +02:00
yacySeed nextSeed ;
2006-11-20 03:46:53 +01:00
public acceptRemoteIndexSeedEnum ( String starthash ) {
se = getDHTSeeds ( true , starthash , yacyVersion . YACY_HANDLES_COLLECTION_INDEX ) ;
2005-04-07 21:19:42 +02:00
nextSeed = nextInternal ( ) ;
2006-11-20 03:46:53 +01:00
}
2005-04-07 21:19:42 +02:00
public boolean hasMoreElements ( ) {
2006-11-20 03:46:53 +01:00
return nextSeed ! = null ;
}
2005-04-07 21:19:42 +02:00
private yacySeed nextInternal ( ) {
yacySeed s ;
2005-04-15 16:18:14 +02:00
try {
while ( se . hasMoreElements ( ) ) {
s = ( yacySeed ) se . nextElement ( ) ;
if ( s = = null ) return null ;
if ( s . getFlagAcceptRemoteIndex ( ) ) return s ;
}
} catch ( kelondroException e ) {
2007-01-30 00:51:10 +01:00
System . out . println ( " DEBUG acceptRemoteIndexSeedEnum: " + e . getMessage ( ) ) ;
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " database inconsistency ( " + e . getMessage ( ) + " ), re-set of db. " ) ;
2005-04-15 16:18:14 +02:00
seedDB . resetActiveTable ( ) ;
return null ;
2005-04-07 21:19:42 +02:00
}
return null ;
}
2006-11-20 03:46:53 +01:00
public Object nextElement ( ) {
2005-04-07 21:19:42 +02:00
yacySeed next = nextSeed ;
nextSeed = nextInternal ( ) ;
return next ;
2006-11-20 03:46:53 +01:00
}
2005-04-07 21:19:42 +02:00
}
public Enumeration getAcceptRemoteCrawlSeeds ( String starthash , boolean available ) {
return new acceptRemoteCrawlSeedEnum ( starthash , available ) ;
}
class acceptRemoteCrawlSeedEnum implements Enumeration {
2006-11-20 03:46:53 +01:00
Enumeration se ;
2005-04-07 21:19:42 +02:00
yacySeed nextSeed ;
boolean available ;
2006-11-20 03:46:53 +01:00
public acceptRemoteCrawlSeedEnum ( String starthash , boolean available ) {
this . se = getDHTSeeds ( true , starthash , ( float ) 0 . 0 ) ;
2005-04-07 21:19:42 +02:00
this . available = available ;
nextSeed = nextInternal ( ) ;
2006-11-20 03:46:53 +01:00
}
2005-04-07 21:19:42 +02:00
public boolean hasMoreElements ( ) {
2006-11-20 03:46:53 +01:00
return nextSeed ! = null ;
}
2005-04-07 21:19:42 +02:00
private yacySeed nextInternal ( ) {
yacySeed s ;
while ( se . hasMoreElements ( ) ) {
s = ( yacySeed ) se . nextElement ( ) ;
2006-11-20 03:46:53 +01:00
if ( s = = null ) return null ;
2005-07-08 17:17:50 +02:00
s . available = seedCrawlReady . getScore ( s . hash ) ;
2005-04-07 21:19:42 +02:00
if ( available ) {
if ( seedCrawlReady . getScore ( s . hash ) < yacyCore . yacyTime ( ) ) return s ;
} else {
if ( seedCrawlReady . getScore ( s . hash ) > yacyCore . yacyTime ( ) ) return s ;
}
}
return null ;
}
2006-11-20 03:46:53 +01:00
public Object nextElement ( ) {
2005-04-07 21:19:42 +02:00
yacySeed next = nextSeed ;
nextSeed = nextInternal ( ) ;
return next ;
2006-11-20 03:46:53 +01:00
}
2005-04-07 21:19:42 +02:00
}
2007-04-26 11:51:51 +02:00
public synchronized yacySeed getGlobalCrawlSeed ( String urlHash ) {
2005-04-07 21:19:42 +02:00
Enumeration e = getAcceptRemoteCrawlSeeds ( urlHash , true ) ;
yacySeed seed ;
if ( e . hasMoreElements ( ) ) seed = ( yacySeed ) e . nextElement ( ) ; else seed = null ;
e = null ;
return seed ;
}
2007-04-26 11:51:51 +02:00
public synchronized yacySeed getPublicClusterCrawlSeed ( String urlHash , TreeSet clusterhashes ) {
kelondroCloneableIterator i = new kelondroRotateIterator ( new kelondroCloneableSetIterator ( clusterhashes , urlHash ) , null ) ;
if ( i . hasNext ( ) ) {
return seedDB . getConnected ( ( String ) i . next ( ) ) ;
}
return null ;
}
2005-04-07 21:19:42 +02:00
public void setCrawlTime ( String seedHash , int newYacyTime ) {
if ( newYacyTime < yacyCore . yacyTime ( ) ) newYacyTime = yacyCore . yacyTime ( ) ;
seedCrawlReady . setScore ( seedHash , newYacyTime ) ;
}
public void setCrawlDelay ( String seedHash , int newDelay ) {
seedCrawlReady . setScore ( seedHash , yacyCore . yacyTime ( ) + newDelay ) ;
}
public void processPeerArrival ( yacySeed peer , boolean direct ) {
if ( peer . getVersion ( ) > = ( ( float ) 0 . 3 ) ) {
if ( ! ( seedCrawlReady . existsScore ( peer . hash ) ) ) seedCrawlReady . setScore ( peer . hash , yacyCore . yacyTime ( ) ) ;
} else {
seedCrawlReady . deleteScore ( peer . hash ) ;
}
}
public void processPeerDeparture ( yacySeed peer ) {
seedCrawlReady . deleteScore ( peer . hash ) ;
}
public void processPeerPing ( yacySeed peer ) {
}
2005-08-14 02:57:30 +02:00
2006-09-14 02:51:02 +02:00
public static boolean shallBeOwnWord ( String wordhash ) {
2006-12-05 03:47:51 +01:00
if ( yacyCore . seedDB = = null ) return false ;
2006-09-14 11:28:17 +02:00
if ( yacyCore . seedDB . mySeed . isPotential ( ) ) return false ;
2006-09-14 02:51:02 +02:00
final double distance = dhtDistance ( yacyCore . seedDB . mySeed . hash , wordhash ) ;
final double max = 1 . 2 / yacyCore . seedDB . sizeConnected ( ) ;
//System.out.println("Distance for " + wordhash + ": " + distance + "; max is " + max);
return ( distance > 0 ) & & ( distance < = max ) ;
}
2005-08-14 02:57:30 +02:00
public static double dhtDistance ( String peer , String word ) {
// the dht distance is a positive value between 0 and 1
// if the distance is small, the word more probably belongs to the peer
double d = hashDistance ( peer , word ) ;
if ( d > 0 ) {
return d ; // case where the word is 'before' the peer
} else {
2006-12-14 03:48:37 +01:00
return ( ( double ) 1 ) + d ; // wrap-around case
2005-08-14 02:57:30 +02:00
}
}
private static double hashDistance ( String from , String to ) {
// computes the distance between two hashes.
// the maximum distance between two hashes is 1, the minimum -1
// this can be used like "from - to"
// the result is positive if from > to
2006-12-14 03:48:37 +01:00
assert ( from ! = null ) ;
assert ( to ! = null ) ;
2006-12-17 23:52:58 +01:00
assert ( from . length ( ) = = 12 ) : " from.length = " + from . length ( ) + " , from = " + from ;
assert ( to . length ( ) = = 12 ) : " to.length = " + to . length ( ) + " , to = " + to ;
2006-12-14 03:48:37 +01:00
return ( ( double ) ( kelondroBase64Order . enhancedCoder . cardinal ( from . getBytes ( ) ) - kelondroBase64Order . enhancedCoder . cardinal ( to . getBytes ( ) ) ) ) / ( ( double ) Long . MAX_VALUE ) ;
2005-08-14 02:57:30 +02:00
}
2006-12-14 03:48:37 +01:00
public synchronized ArrayList /* of yacySeed */ getDHTTargets ( serverLog log , int primaryPeerCount , int reservePeerCount , String firstKey , String lastKey , double maxDist ) {
2006-02-20 00:47:45 +01:00
// find a list of DHT-peers
2006-12-14 03:48:37 +01:00
assert
! ( kelondroBase64Order . enhancedCoder . cardinal ( firstKey . getBytes ( ) ) < kelondroBase64Order . enhancedCoder . cardinal ( yacyCore . seedDB . mySeed . hash . getBytes ( ) ) & &
kelondroBase64Order . enhancedCoder . cardinal ( lastKey . getBytes ( ) ) > kelondroBase64Order . enhancedCoder . cardinal ( yacyCore . seedDB . mySeed . hash . getBytes ( ) ) ) ;
ArrayList seeds = new ArrayList ( ) ;
yacySeed seed ;
2006-02-20 00:47:45 +01:00
double ownDistance = Math . min ( yacyDHTAction . dhtDistance ( yacyCore . seedDB . mySeed . hash , firstKey ) , yacyDHTAction . dhtDistance ( yacyCore . seedDB . mySeed . hash , lastKey ) ) ;
double maxDistance = Math . min ( ownDistance , maxDist ) ;
double avdist ;
Enumeration e = this . getAcceptRemoteIndexSeeds ( lastKey ) ;
2007-03-22 10:08:38 +01:00
Hashtable peerFilter = new Hashtable ( ) ;
2006-12-14 03:48:37 +01:00
while ( ( e . hasMoreElements ( ) ) & & ( seeds . size ( ) < ( primaryPeerCount + reservePeerCount ) ) ) {
seed = ( yacySeed ) e . nextElement ( ) ;
if ( seeds ! = null ) {
avdist = Math . max ( yacyDHTAction . dhtDistance ( seed . hash , firstKey ) , yacyDHTAction . dhtDistance ( seed . hash , lastKey ) ) ;
2007-03-22 10:08:38 +01:00
if ( avdist < maxDistance & & ! peerFilter . containsKey ( seed . hash ) ) {
2006-12-14 03:48:37 +01:00
if ( log ! = null ) log . logInfo ( " Selected " + ( ( seeds . size ( ) < primaryPeerCount ) ? " primary " : " reserve " ) + " DHT target peer " + seed . getName ( ) + " : " + seed . hash + " , distance = " + avdist ) ;
seeds . add ( seed ) ;
2007-03-22 10:08:38 +01:00
peerFilter . put ( seed . hash , seed ) ;
2006-02-20 00:47:45 +01:00
}
}
}
e = null ; // finish enumeration
2006-12-14 03:48:37 +01:00
return seeds ;
2006-02-20 00:47:45 +01:00
}
2005-04-07 21:19:42 +02:00
}