2005-05-07 23:11:18 +02:00
// yacyClient.java
2005-04-07 21:19:42 +02:00
// -------------------------------------
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
2005-09-21 23:32:43 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notice above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.yacy ;
2005-05-05 07:32:19 +02:00
import java.io.IOException ;
2006-01-10 17:48:59 +01:00
import java.io.UnsupportedEncodingException ;
2005-10-05 12:45:33 +02:00
import java.util.ArrayList ;
2005-05-05 07:32:19 +02:00
import java.util.HashMap ;
2005-10-13 15:57:15 +02:00
import java.util.Iterator ;
2006-09-30 00:27:20 +02:00
import java.util.Map ;
import java.util.TreeMap ;
2006-01-30 01:42:38 +01:00
2007-07-05 01:48:52 +02:00
import de.anomic.http.httpRemoteProxyConfig ;
2005-05-05 07:32:19 +02:00
import de.anomic.http.httpc ;
2006-05-28 03:09:31 +02:00
import de.anomic.index.indexContainer ;
2006-11-08 17:17:47 +01:00
import de.anomic.index.indexRWIEntry ;
2008-01-22 12:51:43 +01:00
import de.anomic.index.indexRWIRowEntry ;
2006-08-02 22:01:59 +02:00
import de.anomic.index.indexURLEntry ;
2006-01-04 01:39:00 +01:00
import de.anomic.kelondro.kelondroBase64Order ;
2006-11-23 03:16:30 +01:00
import de.anomic.kelondro.kelondroBitfield ;
import de.anomic.plasma.plasmaCondenser ;
2007-11-07 23:38:09 +01:00
import de.anomic.plasma.plasmaSearchRankingProcess ;
2006-02-26 12:30:37 +01:00
import de.anomic.plasma.plasmaSearchRankingProfile ;
2005-07-06 16:48:41 +02:00
import de.anomic.plasma.plasmaSnippetCache ;
2005-05-05 07:32:19 +02:00
import de.anomic.plasma.plasmaSwitchboard ;
2006-11-19 21:05:25 +01:00
import de.anomic.plasma.plasmaWordIndex ;
2006-08-12 16:28:14 +02:00
import de.anomic.plasma.urlPattern.plasmaURLPattern ;
2006-09-11 12:39:25 +02:00
import de.anomic.server.serverByteBuffer ;
2006-08-12 16:28:14 +02:00
import de.anomic.server.serverCodings ;
2005-05-05 07:32:19 +02:00
import de.anomic.server.serverCore ;
2007-07-24 02:46:17 +02:00
import de.anomic.server.serverDomains ;
2005-05-05 07:32:19 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.tools.crypt ;
import de.anomic.tools.nxTools ;
2007-11-29 03:07:37 +01:00
import de.anomic.xml.rssReader ;
2005-04-07 21:19:42 +02:00
2005-10-05 12:45:33 +02:00
public final class yacyClient {
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
public static int publishMySeed ( String address , String otherHash ) {
2005-05-07 23:11:18 +02:00
// this is called to enrich the seed information by
// - own address (if peer is behind a nat/router)
// - check peer type (virgin/junior/senior/principal)
// to do this, we send a 'Hello' to another peer
// this carries the following information:
// 'iam' - own hash
// 'youare' - remote hash, to verify that we are correct
// 'key' - a session key that the remote peer may use to answer
// and the own seed string
// we expect the following information to be send back:
// - 'yourip' the ip of the connection peer (we)
// - 'yourtype' the type of this peer that the other peer checked by asking for a specific word
// and the remote seed string
// the number of new seeds are returned
// one exceptional failure case is when we know the other's peers hash, the other peers responds correctly
// but they appear to be another peer by comparisment of the other peer's hash
// this works of course only if we know the other peer's hash.
2008-01-17 19:43:01 +01:00
HashMap < String , String > result = null ;
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , null ) ;
2007-09-12 02:42:53 +02:00
for ( int retry = 0 ; retry < 3 ; retry + + ) try {
2007-07-05 01:48:52 +02:00
// generate request
2007-10-24 23:38:19 +02:00
post . put ( " count " , " 20 " ) ;
post . put ( " seed " , yacyCore . seedDB . mySeed ( ) . genSeedStr ( post . get ( " key " , " " ) ) ) ;
2007-09-04 01:43:55 +02:00
yacyCore . log . logFine ( " yacyClient.publishMySeed thread ' " + Thread . currentThread ( ) . getName ( ) + " ' contacting peer at " + address ) ;
2007-07-05 01:48:52 +02:00
// send request
2005-10-22 15:28:04 +02:00
result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + address + " /yacy/hello.html " , null ) ,
2006-06-21 16:25:27 +02:00
yacySeed . b64Hash2hexHash ( otherHash ) + " .yacyh " ,
2007-06-16 01:47:08 +02:00
12000 ,
2005-10-22 15:28:04 +02:00
null ,
null ,
2007-07-05 01:48:52 +02:00
proxyConfig ( ) ,
2007-09-11 17:27:01 +02:00
post ,
2005-11-11 00:48:20 +01:00
null
2006-12-01 12:01:56 +01:00
) , " UTF-8 "
2005-10-22 15:28:04 +02:00
) ;
2007-09-12 02:42:53 +02:00
break ;
2005-05-07 23:11:18 +02:00
} catch ( Exception e ) {
2005-07-07 15:58:54 +02:00
if ( Thread . currentThread ( ) . isInterrupted ( ) ) {
2005-08-30 23:10:39 +02:00
yacyCore . log . logFine ( " yacyClient.publishMySeed thread ' " + Thread . currentThread ( ) . getName ( ) + " ' interrupted. " ) ;
2007-09-12 02:42:53 +02:00
return - 1 ;
2005-07-07 15:58:54 +02:00
} else {
2007-09-12 02:42:53 +02:00
yacyCore . log . logFine ( " yacyClient.publishMySeed thread ' " + Thread . currentThread ( ) . getName ( ) + " ' exception: " + e . getMessage ( ) + " ; retry = " + retry ) ; // here VERY OFTEN a 'Connection reset' appears. What is the cause?
// try again (go into loop)
2005-07-07 15:58:54 +02:00
}
2007-09-12 02:42:53 +02:00
result = null ;
2005-05-07 23:11:18 +02:00
}
2007-09-12 02:42:53 +02:00
2005-09-21 23:32:43 +02:00
if ( result = = null | | result . size ( ) < 3 ) {
2005-08-30 23:10:39 +02:00
yacyCore . log . logFine ( " yacyClient.publishMySeed result error: " +
2005-05-07 23:11:18 +02:00
( ( result = = null ) ? " result null " : ( " result= " + result . toString ( ) ) ) ) ;
return - 1 ;
}
2005-09-21 23:32:43 +02:00
2005-05-07 23:11:18 +02:00
// check consistency with expectation
2005-06-23 13:00:26 +02:00
yacySeed otherPeer = null ;
2005-06-16 09:28:07 +02:00
float otherPeerVersion = 0 ;
2007-06-13 15:21:19 +02:00
String seed ;
if ( ( otherHash ! = null ) & &
( otherHash . length ( ) > 0 ) & &
( ( seed = ( String ) result . get ( " seed0 " ) ) ! = null ) ) {
2007-06-13 00:08:33 +02:00
if ( seed . length ( ) > yacySeed . maxsize ) {
yacyCore . log . logInfo ( " hello/client 0: rejected contacting seed; too large ( " + seed . length ( ) + " > " + yacySeed . maxsize + " ) " ) ;
} else {
2007-09-11 17:27:01 +02:00
otherPeer = yacySeed . genRemoteSeed ( seed , post . get ( " key " , " " ) , true ) ;
2007-06-13 00:08:33 +02:00
if ( otherPeer = = null | | ! otherPeer . hash . equals ( otherHash ) ) {
yacyCore . log . logFine ( " yacyClient.publishMySeed: consistency error: other peer ' " + ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) ) + " ' wrong " ) ;
return - 1 ; // no success
}
otherPeerVersion = otherPeer . getVersion ( ) ;
2005-05-07 23:11:18 +02:00
}
}
2005-09-21 23:32:43 +02:00
2005-05-07 23:11:18 +02:00
// set my own seed according to new information
2005-06-16 13:40:36 +02:00
// we overwrite our own IP number only, if we do not portForwarding
2006-02-28 13:31:57 +01:00
if ( serverCore . portForwardingEnabled | | serverCore . useStaticIP ) {
2007-10-01 14:30:23 +02:00
yacyCore . seedDB . mySeed ( ) . put ( yacySeed . IP , serverDomains . myPublicIP ( ) ) ;
2005-09-21 23:32:43 +02:00
} else {
2007-10-01 14:30:23 +02:00
yacyCore . seedDB . mySeed ( ) . put ( yacySeed . IP , ( String ) result . get ( " yourip " ) ) ;
2005-06-16 09:28:07 +02:00
}
2005-09-21 23:32:43 +02:00
2005-06-16 09:28:07 +02:00
/ * If we have port forwarding enabled but the other peer uses a too old yacy version
* we can ignore the seed - type that was reported by the peer .
*
* Otherwise we have to change our seed - type
*
* @see serverCore # portForwardingEnabled
* /
2005-09-22 12:30:55 +02:00
if ( ! serverCore . portForwardingEnabled | | otherPeerVersion > = yacyVersion . YACY_SUPPORTS_PORT_FORWARDING ) {
2005-10-17 17:46:12 +02:00
String mytype = ( String ) result . get ( yacySeed . YOURTYPE ) ;
2007-09-11 17:27:01 +02:00
if ( mytype = = null ) { mytype = " " ; }
2006-01-23 00:14:37 +01:00
yacyAccessible accessible = new yacyAccessible ( ) ;
2006-01-25 02:50:24 +01:00
if ( mytype . equals ( yacySeed . PEERTYPE_SENIOR ) | | mytype . equals ( yacySeed . PEERTYPE_PRINCIPAL ) ) {
2006-01-23 00:14:37 +01:00
accessible . IWasAccessed = true ;
2007-10-01 14:30:23 +02:00
if ( yacyCore . seedDB . mySeed ( ) . isPrincipal ( ) ) {
2006-01-23 00:14:37 +01:00
mytype = yacySeed . PEERTYPE_PRINCIPAL ;
}
} else {
accessible . IWasAccessed = false ;
2005-07-04 13:09:48 +02:00
}
2006-01-23 00:14:37 +01:00
accessible . lastUpdated = System . currentTimeMillis ( ) ;
yacyCore . amIAccessibleDB . put ( otherHash , accessible ) ;
2005-09-21 23:32:43 +02:00
2005-07-04 13:09:48 +02:00
/ *
* If we were reported as junior we have to check if your port forwarding channel is broken
* If this is true we try to reconnect the sch channel to the remote server now .
* /
if ( mytype . equalsIgnoreCase ( yacySeed . PEERTYPE_JUNIOR ) ) {
2005-06-23 13:00:26 +02:00
yacyCore . log . logInfo ( " yacyClient.publishMySeed: Peer ' " + ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) ) + " ' reported us as junior. " ) ;
2005-07-04 13:09:48 +02:00
if ( serverCore . portForwardingEnabled ) {
2005-09-21 23:32:43 +02:00
if ( ! Thread . currentThread ( ) . isInterrupted ( ) & &
serverCore . portForwarding ! = null & &
! serverCore . portForwarding . isConnected ( )
2005-07-04 13:09:48 +02:00
) {
yacyCore . log . logWarning ( " yacyClient.publishMySeed: Broken portForwarding channel detected. Trying to reconnect ... " ) ;
try {
serverCore . portForwarding . reconnect ( ) ;
} catch ( IOException e ) {
yacyCore . log . logWarning ( " yacyClient.publishMySeed: Unable to reconnect to port forwarding host. " ) ;
}
}
}
2007-09-11 17:27:01 +02:00
} else if ( ( mytype . equalsIgnoreCase ( yacySeed . PEERTYPE_SENIOR ) ) | |
( mytype . equalsIgnoreCase ( yacySeed . PEERTYPE_PRINCIPAL ) ) ) {
yacyCore . log . logFine ( " yacyClient.publishMySeed: Peer ' " + ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) ) + " ' reported us as " + mytype + " , accepted other peer. " ) ;
2005-08-29 13:39:10 +02:00
} else {
2007-09-11 17:27:01 +02:00
// wrong type report
yacyCore . log . logFine ( " yacyClient.publishMySeed: Peer ' " + ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) ) + " ' reported us as " + mytype + " , rejecting other peer. " ) ;
return - 1 ;
2005-06-23 13:00:26 +02:00
}
2007-10-01 14:30:23 +02:00
if ( yacyCore . seedDB . mySeed ( ) . orVirgin ( ) . equals ( yacySeed . PEERTYPE_VIRGIN ) )
yacyCore . seedDB . mySeed ( ) . put ( yacySeed . PEERTYPE , mytype ) ;
2005-06-16 09:28:07 +02:00
}
2005-09-21 23:32:43 +02:00
2007-10-01 14:30:23 +02:00
final String error = yacyCore . seedDB . mySeed ( ) . isProper ( ) ;
2005-09-21 23:32:43 +02:00
if ( error ! = null ) {
2007-10-01 14:30:23 +02:00
yacyCore . log . logSevere ( " yacyClient.publishMySeed mySeed error - not proper: " + error ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-09-21 23:32:43 +02:00
2005-10-17 17:46:12 +02:00
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
2007-10-19 17:31:38 +02:00
2005-05-07 23:11:18 +02:00
// read the seeds that the peer returned and integrate them into own database
2005-04-07 21:19:42 +02:00
int i = 0 ;
2005-05-07 23:11:18 +02:00
int count = 0 ;
2005-09-21 23:32:43 +02:00
String seedStr ;
2007-02-02 15:52:54 +01:00
while ( ( seedStr = ( String ) result . get ( " seed " + i + + ) ) ! = null ) {
2005-05-07 23:11:18 +02:00
// integrate new seed into own database
// the first seed, "seed0" is the seed of the responding peer
2007-06-13 00:08:33 +02:00
if ( seedStr . length ( ) > yacySeed . maxsize ) {
yacyCore . log . logInfo ( " hello/client: rejected contacting seed; too large ( " + seedStr . length ( ) + " > " + yacySeed . maxsize + " ) " ) ;
} else {
2007-09-11 17:27:01 +02:00
if ( yacyCore . peerActions . peerArrival ( yacySeed . genRemoteSeed ( seedStr , post . get ( " key " , " " ) , true ) , ( i = = 1 ) ) ) count + + ;
2007-06-13 00:08:33 +02:00
}
2005-05-07 23:11:18 +02:00
}
return count ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
public static yacySeed querySeed ( yacySeed target , String seedHash ) {
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " object " , " seed " ) ;
post . put ( " env " , seedHash ) ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// send request
try {
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + target . getClusterAddress ( ) + " /yacy/query.html " , null ) ,
2007-07-05 01:48:52 +02:00
target . getHexHash ( ) + " .yacyh " ,
8000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
) ;
2005-10-22 15:28:04 +02:00
2005-09-21 23:32:43 +02:00
if ( result = = null | | result . size ( ) = = 0 ) { return null ; }
2005-10-17 17:46:12 +02:00
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
2007-09-11 17:27:01 +02:00
return yacySeed . genRemoteSeed ( ( String ) result . get ( " response " ) , post . get ( " key " , " " ) , true ) ;
2005-04-07 21:19:42 +02:00
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.querySeed error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
public static int queryRWICount ( yacySeed target , String wordHash ) {
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " object " , " rwicount " ) ;
post . put ( " ttl " , " 0 " ) ;
post . put ( " env " , wordHash ) ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// send request
try {
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + target . getClusterAddress ( ) + " /yacy/query.html " , null ) ,
2007-07-05 01:48:52 +02:00
target . getHexHash ( ) + " .yacyh " ,
8000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
) ;
2005-10-22 15:28:04 +02:00
2005-09-21 23:32:43 +02:00
if ( result = = null | | result . size ( ) = = 0 ) { return - 1 ; }
2005-05-07 23:11:18 +02:00
return Integer . parseInt ( ( String ) result . get ( " response " ) ) ;
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.queryRWICount error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2005-10-22 15:28:04 +02:00
public static int queryUrlCount ( yacySeed target ) {
2005-09-21 23:32:43 +02:00
if ( target = = null ) { return - 1 ; }
2007-10-01 14:30:23 +02:00
if ( yacyCore . seedDB . mySeed ( ) = = null ) return - 1 ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " object " , " lurlcount " ) ;
post . put ( " ttl " , " 0 " ) ;
post . put ( " env " , " " ) ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-05-07 23:11:18 +02:00
try {
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + target . getClusterAddress ( ) + " /yacy/query.html " , null ) ,
2007-07-05 01:48:52 +02:00
target . getHexHash ( ) + " .yacyh " ,
6000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
) ;
2005-10-22 15:28:04 +02:00
2005-05-07 23:11:18 +02:00
if ( ( result = = null ) | | ( result . size ( ) = = 0 ) ) return - 1 ;
2005-09-21 23:32:43 +02:00
final String resp = ( String ) result . get ( " response " ) ;
2006-09-07 03:13:03 +02:00
if ( resp = = null ) {
return - 1 ;
} else try {
return Integer . parseInt ( resp ) ;
} catch ( NumberFormatException e ) {
return - 1 ;
}
2006-05-10 15:57:31 +02:00
} catch ( IOException e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.queryUrlCount error asking peer ' " + target . getName ( ) + " ': " + e . toString ( ) ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2007-11-29 03:07:37 +01:00
public static rssReader queryRemoteCrawlURLs ( yacySeed target , int count ) {
// returns a list of
if ( target = = null ) { return null ; }
if ( yacyCore . seedDB . mySeed ( ) = = null ) return null ;
// prepare request
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
post . put ( " call " , " remotecrawl " ) ;
post . put ( " count " , count ) ;
// send request
try {
final byte [ ] result =
httpc . wput ( new yacyURL ( " http:// " + target . getClusterAddress ( ) + " /yacy/urls.xml " , null ) ,
target . getHexHash ( ) + " .yacyh " ,
60000 , /* a long time-out is needed */
null ,
null ,
proxyConfig ( ) ,
post ,
null
) ;
rssReader reader = rssReader . parse ( result ) ;
if ( reader = = null ) {
// case where the rss reader does not understand the content
yacyCore . log . logWarning ( " yacyClient.queryRemoteCrawlURLs failed asking peer ' " + target . getName ( ) + " ': probably bad response from remote peer " ) ;
System . out . println ( " ***DEBUG*** rss input = " + new String ( result ) ) ;
target . put ( yacySeed . RCOUNT , " 0 " ) ;
yacyCore . seedDB . update ( target . hash , target ) ; // overwrite number of remote-available number to avoid that this peer is called again (until update is done by peer ping)
//e.printStackTrace();
return null ;
}
return reader ;
} catch ( IOException e ) {
yacyCore . log . logSevere ( " yacyClient.queryRemoteCrawlURLs error asking peer ' " + target . getName ( ) + " ': " + e . toString ( ) ) ;
return null ;
}
}
2006-10-04 00:55:59 +02:00
public static String [ ] search (
2006-01-30 01:42:38 +01:00
String wordhashes ,
2007-04-10 14:27:03 +02:00
String excludehashes ,
2006-09-13 19:13:28 +02:00
String urlhashes ,
2006-04-20 12:15:00 +02:00
String prefer ,
String filter ,
2007-09-04 01:43:55 +02:00
int count ,
2006-01-30 01:42:38 +01:00
int maxDistance ,
2005-11-08 13:14:51 +01:00
boolean global ,
2007-02-01 14:27:23 +01:00
int partitions ,
2007-07-05 01:48:52 +02:00
yacySeed target ,
2006-11-19 21:05:25 +01:00
plasmaWordIndex wordIndex ,
2007-11-07 23:38:09 +01:00
plasmaSearchRankingProcess containerCache ,
2008-01-17 19:43:01 +01:00
Map < String , TreeMap < String , String > > abstractCache ,
2007-08-15 13:36:59 +02:00
plasmaURLPattern blacklist ,
2006-11-23 03:16:30 +01:00
plasmaSearchRankingProfile rankingProfile ,
kelondroBitfield constraint
2005-11-08 13:14:51 +01:00
) {
2005-05-07 23:11:18 +02:00
// send a search request to peer with remote Hash
// this mainly converts the words into word hashes
2005-09-21 23:32:43 +02:00
2005-05-07 23:11:18 +02:00
// INPUT:
2007-02-01 14:27:23 +01:00
// iam : complete seed of the requesting peer
// youare : seed hash of the target peer, used for testing network stability
// key : transmission key for response
// search : a list of search words
// hsearch : a string of word hashes
// fwdep : forward depth. if "0" then peer may NOT ask another peer for more results
// fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping
// count : maximum number of wanted results
// global : if "true", then result may consist of answers from other peers
// partitions : number of remote peers that are asked (for evaluation of QPM)
// duetime : maximum time that a peer should spent to create a result
2005-09-21 23:32:43 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " myseed " , yacyCore . seedDB . mySeed ( ) . genSeedStr ( post . get ( " key " , " " ) ) ) ;
2007-09-04 01:43:55 +02:00
post . put ( " count " , Math . max ( 10 , count ) ) ;
2007-10-24 23:38:19 +02:00
post . put ( " resource " , ( ( global ) ? " global " : " local " ) ) ;
2007-07-05 01:48:52 +02:00
post . put ( " partitions " , partitions ) ;
2007-10-24 23:38:19 +02:00
post . put ( " query " , wordhashes ) ;
post . put ( " exclude " , excludehashes ) ;
post . put ( " urls " , urlhashes ) ;
post . put ( " prefer " , prefer ) ;
post . put ( " filter " , filter ) ;
post . put ( " ttl " , " 0 " ) ;
2007-07-05 01:48:52 +02:00
post . put ( " maxdist " , maxDistance ) ;
2007-10-24 23:38:19 +02:00
post . put ( " profile " , crypt . simpleEncode ( rankingProfile . toExternalString ( ) ) ) ;
2007-11-22 00:14:57 +01:00
post . put ( " constraint " , ( constraint = = null ) ? " " : constraint . exportB64 ( ) ) ;
2007-10-24 23:38:19 +02:00
if ( abstractCache ! = null ) post . put ( " abstracts " , " auto " ) ;
2007-07-05 01:48:52 +02:00
final long timestamp = System . currentTimeMillis ( ) ;
2007-10-19 17:31:38 +02:00
2007-07-05 01:48:52 +02:00
// send request
2008-01-17 19:43:01 +01:00
HashMap < String , String > result = null ;
2007-07-05 01:48:52 +02:00
try {
result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + target . getClusterAddress ( ) + " /yacy/search.html " , null ) ,
2007-07-05 01:48:52 +02:00
target . getHexHash ( ) + " .yacyh " ,
60000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
2006-12-01 12:01:56 +01:00
) , " UTF-8 "
2007-07-05 01:48:52 +02:00
) ;
} catch ( IOException e ) {
yacyCore . log . logFine ( " SEARCH failed FROM " + target . hash + " : " + target . getName ( ) + " ( " + e . getMessage ( ) + " ), score= " + target . selectscore + " , DHTdist= " + yacyDHTAction . dhtDistance ( target . hash , wordhashes . substring ( 0 , 12 ) ) ) ;
2007-09-12 02:42:53 +02:00
yacyCore . peerActions . peerDeparture ( target , " search request to peer created io exception: " + e . getMessage ( ) ) ;
2007-07-05 01:48:52 +02:00
return null ;
}
2005-10-23 19:50:27 +02:00
2007-07-05 01:48:52 +02:00
if ( ( result = = null ) | | ( result . size ( ) = = 0 ) ) {
yacyCore . log . logFine ( " SEARCH failed FROM "
+ target . hash
+ " : "
+ target . getName ( )
+ " (zero response), score= "
+ target . selectscore
+ " , DHTdist= "
+ yacyDHTAction . dhtDistance ( target . hash , wordhashes
. substring ( 0 , 12 ) ) ) ;
return null ;
}
2005-09-21 23:32:43 +02:00
2007-07-05 01:48:52 +02:00
// compute all computation times
final long totalrequesttime = System . currentTimeMillis ( ) - timestamp ;
// OUTPUT:
// version : application version of responder
// uptime : uptime in seconds of responder
// total : number of total available LURL's for this search
// count : number of returned LURL's for this search
// resource<n> : LURL of search
// fwhop : hops (depth) of forwards that had been performed to construct this result
// fwsrc : peers that helped to construct this result
// fwrec : peers that would have helped to construct this result (recommendations)
// searchtime : time that the peer actually spent to create the result
// references : references (search hints) that was calculated during search
// now create a plasmaIndex out of this result
// System.out.println("yacyClient: " + ((urlhashes.length() == 0) ? "primary" : "secondary")+ " search result = " + result.toString()); // debug
2008-01-30 22:58:30 +01:00
int results = 0 , joincount = 0 ;
2007-10-03 17:45:12 +02:00
try {
2008-01-30 22:58:30 +01:00
results = Integer . parseInt ( result . get ( " count " ) ) ;
joincount = Integer . parseInt ( result . get ( " joincount " ) ) ;
2007-10-03 17:45:12 +02:00
} catch ( NumberFormatException e ) {
yacyCore . log . logFine ( " SEARCH failed FROM " + target . hash + " : " + target . getName ( ) + " , wrong output format " ) ;
yacyCore . peerActions . peerDeparture ( target , " search request to peer created number format exception " ) ;
return null ;
}
2007-07-05 01:48:52 +02:00
// System.out.println("***result count " + results);
2005-09-21 23:32:43 +02:00
2007-07-05 01:48:52 +02:00
// create containers
final int words = wordhashes . length ( ) / yacySeedDB . commonHashLength ;
indexContainer [ ] container = new indexContainer [ words ] ;
for ( int i = 0 ; i < words ; i + + ) {
2007-09-04 01:43:55 +02:00
container [ i ] = plasmaWordIndex . emptyContainer ( wordhashes . substring ( i * yacySeedDB . commonHashLength , ( i + 1 ) * yacySeedDB . commonHashLength ) , count ) ;
2007-07-05 01:48:52 +02:00
}
2007-06-22 16:29:14 +02:00
2007-07-05 01:48:52 +02:00
// insert results to containers
indexURLEntry urlEntry ;
String [ ] urls = new String [ results ] ;
for ( int n = 0 ; n < results ; n + + ) {
// get one single search result
2007-08-15 13:36:59 +02:00
urlEntry = wordIndex . loadedURL . newEntry ( ( String ) result . get ( " resource " + n ) ) ;
2007-07-05 01:48:52 +02:00
if ( urlEntry = = null ) continue ;
assert ( urlEntry . hash ( ) . length ( ) = = 12 ) : " urlEntry.hash() = " + urlEntry . hash ( ) ;
if ( urlEntry . hash ( ) . length ( ) ! = 12 ) continue ; // bad url hash
indexURLEntry . Components comp = urlEntry . comp ( ) ;
if ( blacklist . isListed ( plasmaURLPattern . BLACKLIST_SEARCH , comp . url ( ) ) ) {
yacyCore . log . logInfo ( " remote search (client): filtered blacklisted url " + comp . url ( ) + " from peer " + target . getName ( ) ) ;
continue ; // block with backlist
}
2007-07-24 02:46:17 +02:00
if ( ! plasmaSwitchboard . getSwitchboard ( ) . acceptURL ( comp . url ( ) ) ) {
yacyCore . log . logInfo ( " remote search (client): rejected url outside of our domain " + comp . url ( ) + " from peer " + target . getName ( ) ) ;
continue ; // reject url outside of our domain
}
2006-12-05 03:47:51 +01:00
2007-07-05 01:48:52 +02:00
// save the url entry
indexRWIEntry entry ;
if ( urlEntry . word ( ) = = null ) {
yacyCore . log . logWarning ( " remote search (client): no word attached from peer " + target . getName ( ) + " , version " + target . getVersion ( ) ) ;
continue ; // no word attached
}
2005-09-21 23:32:43 +02:00
2007-08-15 13:36:59 +02:00
// the search-result-url transports all the attributes of word indexes
2007-07-05 01:48:52 +02:00
entry = urlEntry . word ( ) ;
if ( ! ( entry . urlHash ( ) . equals ( urlEntry . hash ( ) ) ) ) {
yacyCore . log . logInfo ( " remote search (client): url-hash " + urlEntry . hash ( ) + " does not belong to word-attached-hash " + entry . urlHash ( ) + " ; url = " + comp . url ( ) + " from peer " + target . getName ( ) ) ;
continue ; // spammed
}
2005-05-07 23:11:18 +02:00
2007-07-05 01:48:52 +02:00
// passed all checks, store url
try {
2007-08-15 13:36:59 +02:00
wordIndex . loadedURL . store ( urlEntry ) ;
2007-10-01 14:30:23 +02:00
wordIndex . loadedURL . stack ( urlEntry , yacyCore . seedDB . mySeed ( ) . hash , target . hash , 2 ) ;
2007-07-05 01:48:52 +02:00
} catch ( IOException e ) {
yacyCore . log . logSevere ( " could not store search result " , e ) ;
continue ; // db-error
}
if ( urlEntry . snippet ( ) ! = null ) {
2007-08-15 13:36:59 +02:00
// we don't store the snippets along the url entry,
// because they are search-specific.
2007-07-05 01:48:52 +02:00
// instead, they are placed in a snipped-search cache.
2007-08-15 13:36:59 +02:00
// System.out.println("--- RECEIVED SNIPPET '" + link.snippet() + "'");
plasmaSnippetCache . storeToCache ( wordhashes , urlEntry . hash ( ) , urlEntry . snippet ( ) ) ;
2007-07-05 01:48:52 +02:00
}
2007-09-04 01:43:55 +02:00
2007-07-05 01:48:52 +02:00
// add the url entry to the word indexes
for ( int m = 0 ; m < words ; m + + ) {
container [ m ] . add ( entry , System . currentTimeMillis ( ) ) ;
}
2007-09-04 01:43:55 +02:00
2007-07-05 01:48:52 +02:00
// store url hash for statistics
urls [ n ] = urlEntry . hash ( ) ;
}
2007-09-04 01:43:55 +02:00
// store remote result to local result container
synchronized ( containerCache ) {
// insert one container into the search result buffer
2008-01-30 22:58:30 +01:00
containerCache . insertRanked ( container [ 0 ] , false , joincount ) ; // one is enough
2007-09-04 01:43:55 +02:00
// integrate remote topwords
String references = ( String ) result . get ( " references " ) ;
2007-09-08 13:50:19 +02:00
yacyCore . log . logInfo ( " remote search (client): peer " + target . getName ( ) + " sent references " + references ) ;
if ( references ! = null ) {
// add references twice, so they can be countet (must have at least 2 entries)
containerCache . addReferences ( references . split ( " , " ) ) ;
containerCache . addReferences ( references . split ( " , " ) ) ;
}
2007-09-04 01:43:55 +02:00
}
2007-07-05 01:48:52 +02:00
// insert the containers to the index
2007-09-04 01:43:55 +02:00
for ( int m = 0 ; m < words ; m + + ) {
2007-08-15 13:36:59 +02:00
wordIndex . addEntries ( container [ m ] , System . currentTimeMillis ( ) , true ) ;
2007-07-05 01:48:52 +02:00
}
2007-08-15 13:36:59 +02:00
2007-07-05 01:48:52 +02:00
// read index abstract
if ( abstractCache ! = null ) {
2008-01-22 12:51:43 +01:00
Iterator < Map . Entry < String , String > > i = result . entrySet ( ) . iterator ( ) ;
Map . Entry < String , String > entry ;
TreeMap < String , String > singleAbstract ;
2007-07-05 01:48:52 +02:00
String wordhash ;
serverByteBuffer ci ;
while ( i . hasNext ( ) ) {
2008-01-22 12:51:43 +01:00
entry = i . next ( ) ;
if ( entry . getKey ( ) . startsWith ( " indexabstract. " ) ) {
wordhash = entry . getKey ( ) . substring ( 14 ) ;
2007-07-05 01:48:52 +02:00
synchronized ( abstractCache ) {
2008-01-17 19:43:01 +01:00
singleAbstract = ( TreeMap < String , String > ) abstractCache . get ( wordhash ) ; // a mapping from url-hashes to a string of peer-hashes
2008-01-22 12:51:43 +01:00
if ( singleAbstract = = null ) singleAbstract = new TreeMap < String , String > ( ) ;
ci = new serverByteBuffer ( entry . getValue ( ) . getBytes ( ) ) ;
2007-07-15 14:39:16 +02:00
//System.out.println("DEBUG-ABSTRACTFETCH: for word hash " + wordhash + " received " + ci.toString());
2007-11-17 02:53:02 +01:00
indexContainer . decompressIndex ( singleAbstract , ci , target . hash ) ;
2007-07-05 01:48:52 +02:00
abstractCache . put ( wordhash , singleAbstract ) ;
}
}
}
}
2007-10-19 17:31:38 +02:00
2007-07-05 01:48:52 +02:00
// generate statistics
long searchtime ;
try {
searchtime = Integer . parseInt ( ( String ) result . get ( " searchtime " ) ) ;
} catch ( NumberFormatException e ) {
searchtime = totalrequesttime ;
}
yacyCore . log . logFine ( " SEARCH "
+ results
+ " URLS FROM "
+ target . hash
+ " : "
+ target . getName ( )
+ " , score= "
+ target . selectscore
+ " , DHTdist= "
+ ( ( wordhashes . length ( ) < 12 ) ? " void " : Double
. toString ( yacyDHTAction . dhtDistance ( target . hash ,
2007-09-04 01:43:55 +02:00
wordhashes . substring ( 0 , 12 ) ) ) )
+ " , searchtime= " + searchtime + " , netdelay= "
2007-07-05 01:48:52 +02:00
+ ( totalrequesttime - searchtime ) + " , references= "
+ result . get ( " references " ) ) ;
return urls ;
}
2005-09-21 23:32:43 +02:00
2008-01-22 12:51:43 +01:00
public static HashMap < String , String > permissionMessage ( String targetHash ) {
2005-05-07 23:11:18 +02:00
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
2007-10-01 14:30:23 +02:00
if ( yacyCore . seedDB = = null | | yacyCore . seedDB . mySeed ( ) = = null ) { return null ; }
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , targetHash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " process " , " permission " ) ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-05-07 23:11:18 +02:00
try {
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + targetAddress ( targetHash ) + " /yacy/message.html " , null ) ,
2007-07-05 01:48:52 +02:00
yacySeed . b64Hash2hexHash ( targetHash ) + " .yacyh " ,
8000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
2005-10-22 15:28:04 +02:00
) ;
2007-07-05 01:48:52 +02:00
return result ;
2005-05-07 23:11:18 +02:00
} catch ( Exception e ) {
// most probably a network time-out exception
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.permissionMessage error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return null ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2008-01-22 12:51:43 +01:00
public static HashMap < String , String > postMessage ( String targetHash , String subject , byte [ ] message ) {
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , targetHash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " process " , " post " ) ;
post . put ( " myseed " , yacyCore . seedDB . mySeed ( ) . genSeedStr ( post . get ( " key " , " " ) ) ) ;
post . put ( " subject " , subject ) ;
2007-07-05 01:48:52 +02:00
try {
2007-10-24 23:38:19 +02:00
post . put ( " message " , new String ( message , " UTF-8 " ) ) ;
2007-07-05 01:48:52 +02:00
} catch ( UnsupportedEncodingException e ) {
2007-10-24 23:38:19 +02:00
post . put ( " message " , new String ( message ) ) ;
2007-07-05 01:48:52 +02:00
}
2007-08-08 20:23:45 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-11-11 00:48:20 +01:00
try {
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + targetAddress ( targetHash ) + " /yacy/message.html " , null ) ,
2007-07-05 01:48:52 +02:00
yacySeed . b64Hash2hexHash ( targetHash ) + " .yacyh " ,
20000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
) ;
return result ;
2005-11-11 00:48:20 +01:00
} catch ( Exception e ) {
yacyCore . log . logSevere ( " yacyClient.postMessage error: " + e . getMessage ( ) ) ;
return null ;
}
}
public static String targetAddress ( String targetHash ) {
// find target address
2005-05-07 23:11:18 +02:00
String address ;
2007-10-01 14:30:23 +02:00
if ( targetHash . equals ( yacyCore . seedDB . mySeed ( ) . hash ) ) {
address = yacyCore . seedDB . mySeed ( ) . getClusterAddress ( ) ;
2005-09-21 23:32:43 +02:00
} else {
2005-11-11 00:48:20 +01:00
final yacySeed targetSeed = yacyCore . seedDB . getConnected ( targetHash ) ;
if ( targetSeed = = null ) { return null ; }
2007-04-30 00:05:34 +02:00
address = targetSeed . getClusterAddress ( ) ;
2005-09-21 23:32:43 +02:00
}
2005-11-11 00:48:20 +01:00
if ( address = = null ) address = " localhost:8080 " ;
return address ;
}
2008-01-22 12:51:43 +01:00
public static HashMap < String , String > transferPermission ( String targetAddress , long filesize , String filename ) {
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , null ) ;
2007-10-24 23:38:19 +02:00
post . put ( " process " , " permission " ) ;
post . put ( " purpose " , " crcon " ) ;
post . put ( " filename " , filename ) ;
post . put ( " filesize " , Long . toString ( filesize ) ) ;
post . put ( " can-send-protocol " , " http " ) ;
2005-11-11 00:48:20 +01:00
// send request
try {
2007-09-05 11:01:35 +02:00
final yacyURL url = new yacyURL ( " http:// " + targetAddress + " /yacy/transfer.html " , null ) ;
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-07-05 01:48:52 +02:00
httpc . wput ( url ,
url . getHost ( ) ,
6000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
2005-11-11 00:48:20 +01:00
) ;
2007-07-05 01:48:52 +02:00
return result ;
2005-11-11 00:48:20 +01:00
} catch ( Exception e ) {
// most probably a network time-out exception
yacyCore . log . logSevere ( " yacyClient.permissionTransfer error: " + e . getMessage ( ) ) ;
return null ;
}
}
2008-01-22 12:51:43 +01:00
public static HashMap < String , String > transferStore ( String targetAddress , String access , String filename , byte [ ] file ) {
2005-11-11 00:48:20 +01:00
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , null ) ;
2007-10-24 23:38:19 +02:00
post . put ( " process " , " store " ) ;
post . put ( " purpose " , " crcon " ) ;
post . put ( " filename " , filename ) ;
2005-11-11 00:48:20 +01:00
post . put ( " filesize " , Long . toString ( file . length ) ) ;
2007-10-24 23:38:19 +02:00
post . put ( " md5 " , serverCodings . encodeMD5Hex ( file ) ) ;
post . put ( " access " , access ) ;
2008-01-22 12:51:43 +01:00
HashMap < String , byte [ ] > files = new HashMap < String , byte [ ] > ( ) ;
2005-11-11 00:48:20 +01:00
files . put ( " filename " , file ) ;
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-04-07 21:19:42 +02:00
try {
2007-09-05 11:01:35 +02:00
final yacyURL url = new yacyURL ( " http:// " + targetAddress + " /yacy/transfer.html " , null ) ;
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2007-07-05 01:48:52 +02:00
httpc . wput ( url ,
url . getHost ( ) ,
20000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
files
) , " UTF-8 "
) ;
return result ;
2005-04-07 21:19:42 +02:00
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.postMessage error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-11-11 00:48:20 +01:00
public static String transfer ( String targetAddress , String filename , byte [ ] file ) {
2008-01-22 12:51:43 +01:00
HashMap < String , String > phase1 = transferPermission ( targetAddress , file . length , filename ) ;
2005-11-11 00:48:20 +01:00
if ( phase1 = = null ) return " no connection to remote address " + targetAddress + " ; phase 1 " ;
String access = ( String ) phase1 . get ( " access " ) ;
String nextaddress = ( String ) phase1 . get ( " address " ) ;
String protocol = ( String ) phase1 . get ( " protocol " ) ;
2005-12-05 15:24:13 +01:00
//String path = (String) phase1.get("path");
//String maxsize = (String) phase1.get("maxsize");
2005-11-11 00:48:20 +01:00
String response = ( String ) phase1 . get ( " response " ) ;
if ( ( response = = null ) | | ( protocol = = null ) | | ( access = = null ) ) return " wrong return values from other peer; phase 1 " ;
if ( ! ( response . equals ( " ok " ) ) ) return " remote peer rejected transfer: " + response ;
2006-01-04 01:39:00 +01:00
String accesscode = serverCodings . encodeMD5Hex ( kelondroBase64Order . standardCoder . encodeString ( access ) ) ;
2005-11-11 00:48:20 +01:00
if ( protocol . equals ( " http " ) ) {
2008-01-22 12:51:43 +01:00
HashMap < String , String > phase2 = transferStore ( nextaddress , accesscode , filename , file ) ;
2005-11-11 00:48:20 +01:00
if ( phase2 = = null ) return " no connection to remote address " + targetAddress + " ; phase 2 " ;
response = ( String ) phase2 . get ( " response " ) ;
if ( response = = null ) return " wrong return values from other peer; phase 2 " ;
if ( ! ( response . equals ( " ok " ) ) ) {
return " remote peer failed with transfer: " + response ;
}
return null ;
}
return " wrong protocol: " + protocol ;
}
2005-09-21 23:32:43 +02:00
2008-01-17 19:43:01 +01:00
public static HashMap < String , String > crawlReceipt ( yacySeed target , String process , String result , String reason , indexURLEntry entry , String wordhashes ) {
2007-07-05 01:48:52 +02:00
assert ( target ! = null ) ;
2007-10-01 14:30:23 +02:00
assert ( yacyCore . seedDB . mySeed ( ) ! = null ) ;
assert ( yacyCore . seedDB . mySeed ( ) ! = target ) ;
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
/ *
the result can have one of the following values :
negative cases , no retry
unavailable - the resource is not avaiable ( a broken link ) ; not found or interrupted
robot - a robot - file has denied to crawl that resource
2005-05-07 23:11:18 +02:00
2005-04-07 21:19:42 +02:00
negative cases , retry possible
rejected - the peer has rejected to load the resource
dequeue - peer too busy - rejected to crawl
positive cases with crawling
fill - the resource was loaded and processed
update - the resource was already in database but re - loaded and processed
2005-05-07 23:11:18 +02:00
positive cases without crawling
2005-04-07 21:19:42 +02:00
known - the resource is already in database , believed to be fresh and not reloaded
stale - the resource was reloaded but not processed because source had no changes
2005-05-07 23:11:18 +02:00
* /
2005-10-22 15:28:04 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , target . hash ) ;
2007-10-24 23:38:19 +02:00
post . put ( " process " , process ) ;
post . put ( " urlhash " , ( ( entry = = null ) ? " " : entry . hash ( ) ) ) ;
post . put ( " result " , result ) ;
post . put ( " reason " , reason ) ;
post . put ( " wordh " , wordhashes ) ;
post . put ( " lurlEntry " , ( ( entry = = null ) ? " " : crypt . simpleEncode ( entry . toString ( ) , post . get ( " key " , " " ) ) ) ) ;
2007-07-05 01:48:52 +02:00
2005-10-22 15:28:04 +02:00
// determining target address
2007-07-05 01:48:52 +02:00
final String address = target . getClusterAddress ( ) ;
2005-09-21 23:32:43 +02:00
if ( address = = null ) { return null ; }
2007-07-05 01:48:52 +02:00
// send request
2005-04-07 21:19:42 +02:00
try {
2005-10-22 15:28:04 +02:00
return nxTools . table (
2007-09-05 11:01:35 +02:00
httpc . wput ( new yacyURL ( " http:// " + address + " /yacy/crawlReceipt.html " , null ) ,
2007-07-05 01:48:52 +02:00
target . getHexHash ( ) + " .yacyh " ,
60000 ,
null ,
null ,
proxyConfig ( ) ,
post ,
null
) , " UTF-8 "
) ;
2005-05-07 23:11:18 +02:00
} catch ( Exception e ) {
// most probably a network time-out exception
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.crawlReceipt error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return null ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2008-01-22 12:51:43 +01:00
public static HashMap < String , Object > transferIndex ( yacySeed targetSeed , indexContainer [ ] indexes , HashMap < String , indexURLEntry > urlCache , boolean gzipBody , int timeout ) {
2005-10-22 15:28:04 +02:00
2008-01-22 12:51:43 +01:00
HashMap < String , Object > resultObj = new HashMap < String , Object > ( ) ;
2006-06-14 11:40:42 +02:00
int payloadSize = 0 ;
try {
// check if we got all necessary urls in the urlCache (only for debugging)
2008-01-22 12:51:43 +01:00
Iterator < indexRWIRowEntry > eenum ;
2006-11-08 17:17:47 +01:00
indexRWIEntry entry ;
2006-06-14 11:40:42 +02:00
for ( int i = 0 ; i < indexes . length ; i + + ) {
eenum = indexes [ i ] . entries ( ) ;
while ( eenum . hasNext ( ) ) {
2006-11-08 17:17:47 +01:00
entry = ( indexRWIEntry ) eenum . next ( ) ;
2006-07-24 00:39:41 +02:00
if ( urlCache . get ( entry . urlHash ( ) ) = = null ) {
yacyCore . log . logFine ( " DEBUG transferIndex: to-send url hash ' " + entry . urlHash ( ) + " ' is not contained in urlCache " ) ;
2006-06-14 11:40:42 +02:00
}
2006-01-30 13:42:06 +01:00
}
2006-06-14 11:40:42 +02:00
}
// transfer the RWI without the URLs
2008-01-17 19:43:01 +01:00
HashMap < String , String > in = transferRWI ( targetSeed , indexes , gzipBody , timeout ) ;
2006-06-14 11:40:42 +02:00
resultObj . put ( " resultTransferRWI " , in ) ;
if ( in = = null ) {
resultObj . put ( " result " , " no_connection_1 " ) ;
return resultObj ;
}
2008-01-17 19:43:01 +01:00
if ( in . containsKey ( " indexPayloadSize " ) ) payloadSize + = Integer . parseInt ( in . get ( " indexPayloadSize " ) ) ;
2006-06-14 11:40:42 +02:00
String result = ( String ) in . get ( " result " ) ;
if ( result = = null ) {
2008-01-22 12:51:43 +01:00
resultObj . put ( " result " , " no_result_1 " ) ;
2006-06-14 11:40:42 +02:00
return resultObj ;
2006-01-30 13:42:06 +01:00
}
2006-06-14 11:40:42 +02:00
if ( ! ( result . equals ( " ok " ) ) ) {
targetSeed . setFlagAcceptRemoteIndex ( false ) ;
yacyCore . seedDB . update ( targetSeed . hash , targetSeed ) ;
2008-01-22 12:51:43 +01:00
resultObj . put ( " result " , result ) ;
2006-06-14 11:40:42 +02:00
return resultObj ;
2006-01-30 13:42:06 +01:00
}
2006-06-14 11:40:42 +02:00
// in now contains a list of unknown hashes
final String uhss = ( String ) in . get ( " unknownURL " ) ;
if ( uhss = = null ) {
resultObj . put ( " result " , " no_unknownURL_tag_in_response " ) ;
return resultObj ;
}
if ( uhss . length ( ) = = 0 ) { return resultObj ; } // all url's known, we are ready here
final String [ ] uhs = uhss . split ( " , " ) ;
if ( uhs . length = = 0 ) { return resultObj ; } // all url's known
// extract the urlCache from the result
2006-11-08 17:17:47 +01:00
indexURLEntry [ ] urls = new indexURLEntry [ uhs . length ] ;
2006-06-14 11:40:42 +02:00
for ( int i = 0 ; i < uhs . length ; i + + ) {
2006-11-08 17:17:47 +01:00
urls [ i ] = ( indexURLEntry ) urlCache . get ( uhs [ i ] ) ;
2006-06-14 11:40:42 +02:00
if ( urls [ i ] = = null ) {
yacyCore . log . logFine ( " DEBUG transferIndex: requested url hash ' " + uhs [ i ] + " ', unknownURL=' " + uhss + " ' " ) ;
}
}
in = transferURL ( targetSeed , urls , gzipBody , timeout ) ;
resultObj . put ( " resultTransferURL " , in ) ;
if ( in = = null ) {
resultObj . put ( " result " , " no_connection_2 " ) ;
return resultObj ;
}
2008-01-17 19:43:01 +01:00
if ( in . containsKey ( " urlPayloadSize " ) ) payloadSize + = Integer . parseInt ( in . get ( " urlPayloadSize " ) ) ;
2006-06-14 11:40:42 +02:00
result = ( String ) in . get ( " result " ) ;
if ( result = = null ) {
resultObj . put ( " result " , " no_result_2 " ) ;
return resultObj ;
}
if ( ! ( result . equals ( " ok " ) ) ) {
targetSeed . setFlagAcceptRemoteIndex ( false ) ;
yacyCore . seedDB . update ( targetSeed . hash , targetSeed ) ;
resultObj . put ( " result " , result ) ;
return resultObj ;
}
// int doubleentries = Integer.parseInt((String) in.get("double"));
// System.out.println("DEBUG tansferIndex: transferred " + uhs.length + " URL's, double=" + doubleentries);
return resultObj ;
} finally {
resultObj . put ( " payloadSize " , new Integer ( payloadSize ) ) ;
2005-04-07 21:19:42 +02:00
}
}
2005-09-21 23:32:43 +02:00
2008-01-22 12:51:43 +01:00
private static HashMap < String , String > transferRWI ( yacySeed targetSeed , indexContainer [ ] indexes , boolean gzipBody , int timeout ) {
2007-04-30 00:05:34 +02:00
final String address = targetSeed . getPublicAddress ( ) ;
2005-09-21 23:32:43 +02:00
if ( address = = null ) { return null ; }
2007-07-05 01:48:52 +02:00
2005-04-07 21:19:42 +02:00
// prepare post values
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , targetSeed . hash ) ;
2005-09-22 12:30:55 +02:00
// enabling gzip compression for post request body
2005-10-05 12:45:33 +02:00
if ( ( gzipBody ) & & ( targetSeed . getVersion ( ) > = yacyVersion . YACY_SUPPORTS_GZIP_POST_REQUESTS ) ) {
2007-10-24 23:38:19 +02:00
post . put ( httpc . GZIP_POST_BODY , " true " ) ;
2005-09-22 12:30:55 +02:00
}
2005-08-03 04:02:39 +02:00
post . put ( " wordc " , Integer . toString ( indexes . length ) ) ;
2005-10-22 15:28:04 +02:00
2005-04-07 21:19:42 +02:00
int indexcount = 0 ;
2005-09-21 23:32:43 +02:00
final StringBuffer entrypost = new StringBuffer ( indexes . length * 73 ) ;
2008-01-22 12:51:43 +01:00
Iterator < indexRWIRowEntry > eenum ;
2006-11-08 17:17:47 +01:00
indexRWIEntry entry ;
2005-04-07 21:19:42 +02:00
for ( int i = 0 ; i < indexes . length ; i + + ) {
2006-01-30 01:42:38 +01:00
eenum = indexes [ i ] . entries ( ) ;
2005-10-13 15:57:15 +02:00
while ( eenum . hasNext ( ) ) {
2006-11-08 17:17:47 +01:00
entry = ( indexRWIEntry ) eenum . next ( ) ;
2006-07-04 16:47:27 +02:00
entrypost . append ( indexes [ i ] . getWordHash ( ) )
2006-11-17 15:17:20 +01:00
. append ( entry . toPropertyForm ( ) )
2007-12-14 20:17:54 +01:00
. append ( serverCore . CRLF_STRING ) ;
2005-08-13 00:14:24 +02:00
indexcount + + ;
2005-04-07 21:19:42 +02:00
}
}
2005-09-21 23:32:43 +02:00
2005-08-13 00:14:24 +02:00
if ( indexcount = = 0 ) {
// nothing to do but everything ok
2008-01-22 12:51:43 +01:00
final HashMap < String , String > result = new HashMap < String , String > ( 2 ) ;
2005-08-13 00:14:24 +02:00
result . put ( " result " , " ok " ) ;
result . put ( " unknownURL " , " " ) ;
return result ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2007-07-23 02:50:57 +02:00
post . put ( " entryc " , indexcount ) ;
2008-01-17 19:43:01 +01:00
post . put ( " indexes " , entrypost . toString ( ) ) ;
2005-05-07 23:11:18 +02:00
try {
2008-01-17 19:43:01 +01:00
final ArrayList < String > v = nxTools . strings (
2006-12-01 12:01:56 +01:00
httpc . wput (
2007-09-05 11:01:35 +02:00
new yacyURL ( " http:// " + address + " /yacy/transferRWI.html " , null ) ,
2006-06-21 16:25:27 +02:00
targetSeed . getHexHash ( ) + " .yacyh " ,
2005-10-22 15:28:04 +02:00
timeout ,
null ,
null ,
2007-07-05 01:48:52 +02:00
proxyConfig ( ) ,
2005-11-11 00:48:20 +01:00
post ,
null
2006-12-01 12:01:56 +01:00
) , " UTF-8 " ) ;
2005-04-07 21:19:42 +02:00
// this should return a list of urlhashes that are unknwon
2006-12-01 12:01:56 +01:00
if ( ( v ! = null ) & & ( v . size ( ) > 0 ) ) {
2007-10-01 14:30:23 +02:00
yacyCore . seedDB . mySeed ( ) . incSI ( indexcount ) ;
2005-04-07 21:19:42 +02:00
}
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table ( v ) ;
2006-06-14 11:40:42 +02:00
// return the transfered index data in bytes (for debugging only)
2008-01-17 19:43:01 +01:00
result . put ( " indexPayloadSize " , Integer . toString ( entrypost . length ( ) ) ) ;
2005-04-07 21:19:42 +02:00
return result ;
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.transferRWI error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2008-01-17 19:43:01 +01:00
private static HashMap < String , String > transferURL ( yacySeed targetSeed , indexURLEntry [ ] urls , boolean gzipBody , int timeout ) {
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2007-04-30 00:05:34 +02:00
final String address = targetSeed . getPublicAddress ( ) ;
2005-09-21 23:32:43 +02:00
if ( address = = null ) { return null ; }
2007-07-05 01:48:52 +02:00
2005-04-07 21:19:42 +02:00
// prepare post values
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , targetSeed . hash ) ;
2005-09-22 12:30:55 +02:00
// enabling gzip compression for post request body
2005-10-05 12:45:33 +02:00
if ( ( gzipBody ) & & ( targetSeed . getVersion ( ) > = yacyVersion . YACY_SUPPORTS_GZIP_POST_REQUESTS ) ) {
2007-10-24 23:38:19 +02:00
post . put ( httpc . GZIP_POST_BODY , " true " ) ;
2005-09-22 12:30:55 +02:00
}
2005-05-07 23:11:18 +02:00
String resource = " " ;
2005-04-07 21:19:42 +02:00
int urlc = 0 ;
2006-06-14 11:40:42 +02:00
int urlPayloadSize = 0 ;
2005-04-07 21:19:42 +02:00
for ( int i = 0 ; i < urls . length ; i + + ) {
if ( urls [ i ] ! = null ) {
2006-06-14 11:40:42 +02:00
resource = urls [ i ] . toString ( ) ;
2005-04-07 21:19:42 +02:00
if ( resource ! = null ) {
2007-10-24 23:38:19 +02:00
post . put ( " url " + urlc , resource ) ;
2006-06-14 11:40:42 +02:00
urlPayloadSize + = resource . length ( ) ;
2005-04-07 21:19:42 +02:00
urlc + + ;
}
}
}
2007-07-23 02:50:57 +02:00
post . put ( " urlc " , urlc ) ;
2005-05-07 23:11:18 +02:00
try {
2008-01-17 19:43:01 +01:00
final ArrayList < String > v = nxTools . strings (
2006-12-01 12:01:56 +01:00
httpc . wput (
2007-09-05 11:01:35 +02:00
new yacyURL ( " http:// " + address + " /yacy/transferURL.html " , null ) ,
2006-06-21 16:25:27 +02:00
targetSeed . getHexHash ( ) + " .yacyh " ,
2005-10-22 15:28:04 +02:00
timeout ,
null ,
null ,
2007-07-05 01:48:52 +02:00
proxyConfig ( ) ,
2005-11-11 00:48:20 +01:00
post ,
null
2006-12-01 12:01:56 +01:00
) , " UTF-8 " ) ;
2005-10-22 15:28:04 +02:00
2006-12-01 12:01:56 +01:00
if ( ( v ! = null ) & & ( v . size ( ) > 0 ) ) {
2007-10-01 14:30:23 +02:00
yacyCore . seedDB . mySeed ( ) . incSU ( urlc ) ;
2005-04-07 21:19:42 +02:00
}
2006-06-14 11:40:42 +02:00
2008-01-17 19:43:01 +01:00
HashMap < String , String > result = nxTools . table ( v ) ;
2006-06-14 11:40:42 +02:00
// return the transfered url data in bytes (for debugging only)
2008-01-17 19:43:01 +01:00
result . put ( " urlPayloadSize " , Integer . toString ( urlPayloadSize ) ) ;
2006-06-14 11:40:42 +02:00
return result ;
2005-04-07 21:19:42 +02:00
} catch ( Exception e ) {
2006-06-21 16:25:27 +02:00
yacyCore . log . logSevere ( " yacyClient.transferURL error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2008-01-17 19:43:01 +01:00
public static HashMap < String , String > getProfile ( yacySeed targetSeed ) {
2007-07-05 01:48:52 +02:00
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2007-09-11 17:27:01 +02:00
final serverObjects post = yacyNetwork . basicRequestPost ( plasmaSwitchboard . getSwitchboard ( ) , targetSeed . hash ) ;
2007-07-05 01:48:52 +02:00
2007-04-30 00:05:34 +02:00
String address = targetSeed . getClusterAddress ( ) ;
2005-09-21 23:32:43 +02:00
if ( address = = null ) { address = " localhost:8080 " ; }
2005-05-07 23:11:18 +02:00
try {
2006-12-01 12:01:56 +01:00
return nxTools . table (
httpc . wput (
2007-09-05 11:01:35 +02:00
new yacyURL ( " http:// " + address + " /yacy/profile.html " , null ) ,
2006-06-21 16:25:27 +02:00
targetSeed . getHexHash ( ) + " .yacyh " ,
2006-05-10 11:37:18 +02:00
10000 ,
2005-10-22 15:28:04 +02:00
null ,
null ,
2007-07-05 01:48:52 +02:00
proxyConfig ( ) ,
2005-11-11 00:48:20 +01:00
post ,
null
2006-12-01 12:01:56 +01:00
) , " UTF-8 " ) ;
2005-04-07 21:19:42 +02:00
} catch ( Exception e ) {
2005-08-30 23:32:59 +02:00
yacyCore . log . logSevere ( " yacyClient.getProfile error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2007-07-05 01:48:52 +02:00
private static final httpRemoteProxyConfig proxyConfig ( ) {
httpRemoteProxyConfig p = plasmaSwitchboard . getSwitchboard ( ) . remoteProxyConfig ;
return ( ( p ! = null ) & & ( p . useProxy ( ) ) & & ( p . useProxy4Yacy ( ) ) ) ? p : null ;
}
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
public static void main ( String [ ] args ) {
System . out . println ( " yacyClient Test " ) ;
try {
2007-06-16 16:11:52 +02:00
final plasmaSwitchboard sb = new plasmaSwitchboard ( args [ 0 ] , " httpProxy.init " , " DATA/SETTINGS/httpProxy.conf " , false ) ;
2005-12-05 15:24:13 +01:00
/*final yacyCore core =*/ new yacyCore ( sb ) ;
2005-10-18 09:45:27 +02:00
yacyCore . peerActions . loadSeedLists ( ) ;
final yacySeed target = yacyCore . seedDB . getConnected ( args [ 1 ] ) ;
2006-11-23 03:16:30 +01:00
final String wordhashe = plasmaCondenser . word2hash ( " test " ) ;
2005-05-07 23:11:18 +02:00
//System.out.println("permission=" + permissionMessage(args[1]));
2005-10-22 15:28:04 +02:00
// should we use the proxy?
2007-10-01 14:30:23 +02:00
boolean useProxy = ( sb . remoteProxyConfig ! = null ) & &
( sb . remoteProxyConfig . useProxy ( ) ) & &
( sb . remoteProxyConfig . useProxy4Yacy ( ) ) ;
2005-10-22 15:28:04 +02:00
2008-01-17 19:43:01 +01:00
final HashMap < String , String > result = nxTools . table (
2005-10-22 15:28:04 +02:00
httpc . wget (
2007-09-05 11:01:35 +02:00
new yacyURL ( " http:// " + target . getPublicAddress ( ) + " /yacy/search.html " +
2007-10-01 14:30:23 +02:00
" ?myseed= " + yacyCore . seedDB . mySeed ( ) . genSeedStr ( null ) +
2005-10-22 15:28:04 +02:00
" &youare= " + target . hash + " &key= " +
2007-10-01 14:30:23 +02:00
" &myseed= " + yacyCore . seedDB . mySeed ( ) . genSeedStr ( null ) +
2005-10-22 15:28:04 +02:00
" &count=10 " +
" &resource=global " +
2007-06-26 16:37:10 +02:00
" &query= " + wordhashe +
2007-09-05 11:01:35 +02:00
" &network.unit.name= " + plasmaSwitchboard . getSwitchboard ( ) . getConfig ( " network.unit.name " , yacySeed . DFLT_NETWORK_UNIT ) , null ) ,
2006-06-21 16:25:27 +02:00
target . getHexHash ( ) + " .yacyh " ,
2005-10-22 15:28:04 +02:00
5000 ,
null ,
null ,
2007-10-01 14:30:23 +02:00
( useProxy ) ? sb . remoteProxyConfig : null ,
2007-07-04 00:55:47 +02:00
null ,
null
2005-10-22 15:28:04 +02:00
)
2006-10-02 15:35:38 +02:00
, " UTF-8 " ) ;
2005-04-07 21:19:42 +02:00
System . out . println ( " Result= " + result . toString ( ) ) ;
} catch ( Exception e ) {
2005-05-07 23:11:18 +02:00
e . printStackTrace ( ) ;
}
2005-04-07 21:19:42 +02:00
System . exit ( 0 ) ;
}
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
}