2005-05-07 23:11:18 +02:00
// yacyClient.java
2005-04-07 21:19:42 +02:00
// -------------------------------------
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-04-07 21:19:42 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
2005-09-21 23:32:43 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notice above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
2011-09-25 18:59:06 +02:00
package net.yacy.peers ;
2005-04-07 21:19:42 +02:00
2011-05-18 16:26:28 +02:00
import java.io.ByteArrayInputStream ;
2011-02-28 07:28:29 +01:00
import java.io.ByteArrayOutputStream ;
2008-02-01 00:40:47 +01:00
import java.io.File ;
2005-05-05 07:32:19 +02:00
import java.io.IOException ;
2011-05-18 16:26:28 +02:00
import java.io.InputStreamReader ;
2012-07-24 17:29:32 +02:00
import java.net.InetAddress ;
2008-05-04 12:53:04 +02:00
import java.net.MalformedURLException ;
2010-09-14 15:35:47 +02:00
import java.util.ArrayList ;
2005-05-05 07:32:19 +02:00
import java.util.HashMap ;
2005-10-13 15:57:15 +02:00
import java.util.Iterator ;
2010-07-13 01:07:05 +02:00
import java.util.LinkedHashMap ;
2010-09-14 15:35:47 +02:00
import java.util.List ;
2006-09-30 00:27:20 +02:00
import java.util.Map ;
2010-11-28 03:57:31 +01:00
import java.util.SortedMap ;
2006-09-30 00:27:20 +02:00
import java.util.TreeMap ;
2010-03-23 11:17:28 +01:00
import java.util.regex.Pattern ;
2006-01-30 01:42:38 +01:00
2011-05-18 16:26:28 +02:00
import net.yacy.migration ;
2011-10-04 11:06:24 +02:00
import net.yacy.cora.date.GenericFormatter ;
2011-05-27 10:24:54 +02:00
import net.yacy.cora.document.ASCII ;
2012-04-22 00:04:36 +02:00
import net.yacy.cora.document.Classification ;
2011-05-18 16:26:28 +02:00
import net.yacy.cora.document.JSONArray ;
import net.yacy.cora.document.JSONException ;
import net.yacy.cora.document.JSONObject ;
import net.yacy.cora.document.JSONTokener ;
2010-08-23 03:08:56 +02:00
import net.yacy.cora.document.MultiProtocolURI ;
2010-05-25 14:54:57 +02:00
import net.yacy.cora.document.RSSFeed ;
2010-06-27 00:39:27 +02:00
import net.yacy.cora.document.RSSMessage ;
2010-05-25 14:54:57 +02:00
import net.yacy.cora.document.RSSReader ;
2011-02-25 14:26:09 +01:00
import net.yacy.cora.document.UTF8 ;
2011-04-26 15:35:29 +02:00
import net.yacy.cora.protocol.ClientIdentification ;
2011-04-29 12:58:12 +02:00
import net.yacy.cora.protocol.Domains ;
2011-04-26 13:46:31 +02:00
import net.yacy.cora.protocol.http.HTTPClient ;
2011-04-14 22:05:04 +02:00
import net.yacy.cora.services.federated.opensearch.SRURSSConnector ;
2011-06-13 23:44:03 +02:00
import net.yacy.cora.services.federated.yacy.CacheStrategy ;
2012-07-22 13:18:45 +02:00
import net.yacy.kelondro.data.meta.URIMetadata ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.URIMetadataRow ;
import net.yacy.kelondro.data.word.Word ;
import net.yacy.kelondro.data.word.WordReference ;
2011-05-16 00:57:31 +02:00
import net.yacy.kelondro.data.word.WordReferenceFactory ;
2009-12-10 00:27:26 +01:00
import net.yacy.kelondro.index.RowSpaceExceededException ;
2009-11-05 21:28:37 +01:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Base64Order ;
import net.yacy.kelondro.order.Bitfield ;
2011-10-04 11:06:24 +02:00
import net.yacy.kelondro.order.Digest ;
2009-10-10 02:39:15 +02:00
import net.yacy.kelondro.rwi.Reference ;
import net.yacy.kelondro.rwi.ReferenceContainer ;
import net.yacy.kelondro.rwi.ReferenceContainerCache ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.ByteBuffer ;
import net.yacy.kelondro.util.FileUtils ;
2011-09-25 18:59:06 +02:00
import net.yacy.peers.graphics.ProfilingGraph ;
import net.yacy.peers.graphics.WebStructureGraph ;
import net.yacy.peers.graphics.WebStructureGraph.HostReference ;
import net.yacy.peers.operation.yacyVersion ;
2009-10-21 22:14:30 +02:00
import net.yacy.repository.Blacklist ;
2012-06-11 00:17:30 +02:00
import net.yacy.repository.Blacklist.BlacklistType ;
2012-05-04 17:28:27 +02:00
import net.yacy.search.EventTracker ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
2011-10-04 11:06:24 +02:00
import net.yacy.search.SwitchboardConstants ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.index.Segment ;
import net.yacy.search.query.QueryParams ;
2011-09-26 23:42:28 +02:00
import net.yacy.search.query.RWIProcess ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.query.SearchEvent ;
import net.yacy.search.ranking.RankingProfile ;
import net.yacy.search.snippet.TextSnippet ;
2009-10-10 01:22:22 +02:00
2010-07-13 01:07:05 +02:00
import org.apache.http.entity.mime.content.ContentBody ;
2008-04-12 10:12:51 +02:00
2008-05-06 02:32:41 +02:00
import de.anomic.crawler.ResultURLs ;
2011-02-12 01:01:40 +01:00
import de.anomic.crawler.ResultURLs.EventOrigin ;
2005-05-05 07:32:19 +02:00
import de.anomic.server.serverCore ;
2011-10-04 11:06:24 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
2005-05-05 07:32:19 +02:00
import de.anomic.tools.crypt ;
2005-04-07 21:19:42 +02:00
2011-12-06 02:24:51 +01:00
public final class Protocol
{
private static byte [ ] postToFile (
final Seed target ,
final String filename ,
final Map < String , ContentBody > parts ,
final int timeout ) throws IOException {
2011-05-18 16:26:28 +02:00
return postToFile ( target . getClusterAddress ( ) , target . hash , filename , parts , timeout ) ;
2010-05-25 14:54:57 +02:00
}
2011-12-06 02:24:51 +01:00
private static byte [ ] postToFile (
final SeedDB seedDB ,
final String targetHash ,
final String filename ,
final Map < String , ContentBody > parts ,
final int timeout ) throws IOException {
return postToFile ( seedDB . targetAddress ( targetHash ) , targetHash , filename , parts , timeout ) ;
2011-05-18 16:26:28 +02:00
}
2011-12-06 02:24:51 +01:00
private static byte [ ] postToFile (
final String targetAddress ,
final String targetPeerHash ,
final String filename ,
final Map < String , ContentBody > parts ,
final int timeout ) throws IOException {
2011-05-18 16:26:28 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , timeout ) ;
2011-12-06 02:24:51 +01:00
return httpClient . POSTbytes (
new MultiProtocolURI ( " http:// " + targetAddress + " /yacy/ " + filename ) ,
Seed . b64Hash2hexHash ( targetPeerHash ) + " .yacyh " ,
parts ,
false ) ;
2010-05-25 14:54:57 +02:00
}
2011-06-13 23:44:03 +02:00
2008-08-02 15:57:00 +02:00
/ * *
2011-12-06 02:24:51 +01:00
* this is called to enrich the seed information by - own address ( if peer is behind a nat / router ) - check
* peer type ( virgin / junior / senior / principal ) to do this , we send a ' Hello ' to another peer this carries
* the following information : ' iam ' - own hash ' youare ' - remote hash , to verify that we are correct ' key '
* - a session key that the remote peer may use to answer and the own seed string we expect the following
* information to be send back : - ' yourip ' the ip of the connection peer ( we ) - ' yourtype ' the type of
* this peer that the other peer checked by asking for a specific word and the remote seed string one
* exceptional failure case is when we know the other ' s peers hash , the other peers responds correctly but
* they appear to be another peer by comparisment of the other peer ' s hash this works of course only if we
* know the other peer ' s hash .
2012-02-25 14:07:02 +01:00
*
2011-06-13 23:44:03 +02:00
* @return the number of new seeds
2008-08-02 15:57:00 +02:00
* /
2011-12-06 02:24:51 +01:00
public static int hello (
final Seed mySeed ,
final PeerActions peerActions ,
final String address ,
final String otherHash ,
final String otherName ) {
2011-06-13 23:44:03 +02:00
2010-06-01 15:02:11 +02:00
Map < String , String > result = null ;
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2012-05-25 15:33:02 +02:00
long responseTime = Long . MAX_VALUE ;
2010-06-16 10:30:13 +02:00
try {
2007-07-05 01:48:52 +02:00
// generate request
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , null , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " count " , UTF8 . StringBody ( " 20 " ) ) ;
2011-10-04 11:06:24 +02:00
parts . put ( " magic " , UTF8 . StringBody ( Long . toString ( Network . magic ) ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " seed " , UTF8 . StringBody ( mySeed . genSeedStr ( salt ) ) ) ;
2007-07-05 01:48:52 +02:00
// send request
2008-08-02 14:12:04 +02:00
final long start = System . currentTimeMillis ( ) ;
2011-04-26 13:46:31 +02:00
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
2011-04-26 15:35:29 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , 30000 ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] content =
httpClient . POSTbytes (
new MultiProtocolURI ( " http:// " + address + " /yacy/hello.html " ) ,
Seed . b64Hash2hexHash ( otherHash ) + " .yacyh " ,
parts ,
false ) ;
2012-05-25 15:33:02 +02:00
responseTime = System . currentTimeMillis ( ) - start ;
2011-12-06 02:24:51 +01:00
Network . log . logInfo ( " yacyClient.hello thread ' "
+ Thread . currentThread ( ) . getName ( )
+ " ' contacted peer at "
+ address
+ " , received "
+ ( ( content = = null ) ? " null " : content . length )
+ " bytes, time = "
2012-05-25 15:33:02 +02:00
+ responseTime
2011-12-06 02:24:51 +01:00
+ " milliseconds " ) ;
2009-10-05 22:11:41 +02:00
result = FileUtils . table ( content ) ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
if ( Thread . currentThread ( ) . isInterrupted ( ) ) {
Network . log . logInfo ( " yacyClient.hello thread ' "
+ Thread . currentThread ( ) . getName ( )
+ " ' interrupted. " ) ;
2007-09-12 02:42:53 +02:00
return - 1 ;
2005-07-07 15:58:54 +02:00
}
2011-12-06 02:24:51 +01:00
Network . log . logInfo ( " yacyClient.hello thread ' "
+ Thread . currentThread ( ) . getName ( )
+ " ', peer "
+ address
+ " ; exception: "
+ e . getMessage ( ) ) ;
2008-08-02 15:57:00 +02:00
// try again (go into loop)
2007-09-12 02:42:53 +02:00
result = null ;
2005-05-07 23:11:18 +02:00
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( result = = null ) {
Network . log . logInfo ( " yacyClient.hello result error: "
+ ( ( result = = null ) ? " result null " : ( " result= " + result . toString ( ) ) ) ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-09-21 23:32:43 +02:00
2005-05-07 23:11:18 +02:00
// check consistency with expectation
2011-10-04 11:06:24 +02:00
Seed otherPeer = null ;
2007-06-13 15:21:19 +02:00
String seed ;
2011-12-06 02:24:51 +01:00
if ( ( otherHash ! = null ) & & ( otherHash . length ( ) > 0 ) & & ( ( seed = result . get ( " seed0 " ) ) ! = null ) ) {
if ( seed . length ( ) > Seed . maxsize ) {
Network . log . logInfo ( " hello/client 0: rejected contacting seed; too large ( "
+ seed . length ( )
+ " > "
+ Seed . maxsize
+ " ) " ) ;
2007-06-13 00:08:33 +02:00
} else {
2011-12-06 02:24:51 +01:00
try {
final int p = address . indexOf ( ':' ) ;
if ( p < 0 ) {
return - 1 ;
}
2012-07-24 17:29:32 +02:00
String h = address . substring ( 0 , p ) ;
InetAddress ie = Domains . dnsResolve ( h ) ;
final String host = ie = = null ? h : ie . getHostAddress ( ) ; // hack to prevent NPEs
2012-07-05 10:23:07 +02:00
otherPeer = Seed . genRemoteSeed ( seed , false , host ) ;
2011-12-06 02:24:51 +01:00
if ( ! otherPeer . hash . equals ( otherHash ) ) {
Network . log . logInfo ( " yacyClient.hello: consistency error: otherPeer.hash = "
+ otherPeer . hash
+ " , otherHash = "
+ otherHash ) ;
2010-10-26 17:00:22 +02:00
return - 1 ; // no success
}
2011-12-06 02:24:51 +01:00
} catch ( final IOException e ) {
Network . log . logInfo ( " yacyClient.hello: consistency error: other seed bad: "
+ e . getMessage ( )
+ " , seed= "
+ seed ) ;
2010-10-26 17:00:22 +02:00
return - 1 ; // no success
}
2005-05-07 23:11:18 +02:00
}
}
2005-09-21 23:32:43 +02:00
2012-05-25 15:33:02 +02:00
// get access type response
String mytype = result . get ( Seed . YOURTYPE ) ;
if ( mytype = = null ) {
mytype = " " ;
}
2012-05-30 16:26:05 +02:00
2005-05-07 23:11:18 +02:00
// set my own seed according to new information
2008-05-06 01:13:47 +02:00
// we overwrite our own IP number only
2011-12-06 02:24:51 +01:00
if ( serverCore . useStaticIP ) {
2009-10-11 02:12:19 +02:00
mySeed . setIP ( Switchboard . getSwitchboard ( ) . myPublicIP ( ) ) ;
2005-09-21 23:32:43 +02:00
} else {
2008-08-02 14:12:04 +02:00
final String myIP = result . get ( " yourip " ) ;
2011-10-04 11:06:24 +02:00
final String properIP = Seed . isProperIP ( myIP ) ;
2012-07-23 23:40:50 +02:00
mySeed . setFlagRootNode ( ( mytype . equals ( Seed . PEERTYPE_SENIOR ) | | mytype . equals ( Seed . PEERTYPE_PRINCIPAL ) ) & & responseTime < 1000 & & Domains . isThisHostIP ( myIP ) ) ;
2011-12-06 02:24:51 +01:00
if ( properIP = = null ) {
mySeed . setIP ( myIP ) ;
}
2005-06-16 09:28:07 +02:00
}
2005-09-21 23:32:43 +02:00
2008-05-06 01:13:47 +02:00
// change our seed-type
2011-10-04 11:06:24 +02:00
final Accessible accessible = new Accessible ( ) ;
2011-12-06 02:24:51 +01:00
if ( mytype . equals ( Seed . PEERTYPE_SENIOR ) | | mytype . equals ( Seed . PEERTYPE_PRINCIPAL ) ) {
2008-05-06 01:13:47 +02:00
accessible . IWasAccessed = true ;
2011-12-06 02:24:51 +01:00
if ( mySeed . isPrincipal ( ) ) {
2011-10-04 11:06:24 +02:00
mytype = Seed . PEERTYPE_PRINCIPAL ;
2005-07-04 13:09:48 +02:00
}
2008-05-06 01:13:47 +02:00
} else {
accessible . IWasAccessed = false ;
}
accessible . lastUpdated = System . currentTimeMillis ( ) ;
2011-10-04 11:06:24 +02:00
Network . amIAccessibleDB . put ( otherHash , accessible ) ;
2005-09-21 23:32:43 +02:00
2011-06-13 23:44:03 +02:00
/ *
2008-05-06 01:13:47 +02:00
* If we were reported as junior we have to check if your port forwarding channel is broken
* If this is true we try to reconnect the sch channel to the remote server now .
* /
2011-12-06 02:24:51 +01:00
if ( mytype . equalsIgnoreCase ( Seed . PEERTYPE_JUNIOR ) ) {
Network . log . logInfo ( " yacyClient.hello: Peer ' "
+ ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) )
+ " ' reported us as junior. " ) ;
} else if ( ( mytype . equalsIgnoreCase ( Seed . PEERTYPE_SENIOR ) )
| | ( mytype . equalsIgnoreCase ( Seed . PEERTYPE_PRINCIPAL ) ) ) {
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " yacyClient.hello: Peer ' "
+ ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) )
+ " ' reported us as "
+ mytype
+ " , accepted other peer. " ) ;
}
2008-05-06 01:13:47 +02:00
} else {
// wrong type report
2011-12-06 02:24:51 +01:00
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " yacyClient.hello: Peer ' "
+ ( ( otherPeer = = null ) ? " unknown " : otherPeer . getName ( ) )
+ " ' reported us as "
+ mytype
+ " , rejecting other peer. " ) ;
}
2008-05-06 01:13:47 +02:00
return - 1 ;
2005-06-16 09:28:07 +02:00
}
2011-12-06 02:24:51 +01:00
if ( mySeed . orVirgin ( ) . equals ( Seed . PEERTYPE_VIRGIN ) ) {
2011-10-04 11:06:24 +02:00
mySeed . put ( Seed . PEERTYPE , mytype ) ;
2011-12-06 02:24:51 +01:00
}
2005-09-21 23:32:43 +02:00
2008-06-05 00:24:00 +02:00
final String error = mySeed . isProper ( true ) ;
2011-12-06 02:24:51 +01:00
if ( error ! = null ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.hello mySeed error - not proper: " + error ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-09-21 23:32:43 +02:00
2005-10-17 17:46:12 +02:00
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
2007-10-19 17:31:38 +02:00
2005-05-07 23:11:18 +02:00
// read the seeds that the peer returned and integrate them into own database
2005-04-07 21:19:42 +02:00
int i = 0 ;
2005-05-07 23:11:18 +02:00
int count = 0 ;
2005-09-21 23:32:43 +02:00
String seedStr ;
2011-10-04 11:06:24 +02:00
Seed s ;
2011-07-14 09:07:06 +02:00
final int connectedBefore = peerActions . sizeConnected ( ) ;
2011-12-06 02:24:51 +01:00
while ( ( seedStr = result . get ( " seed " + i + + ) ) ! = null ) {
2005-05-07 23:11:18 +02:00
// integrate new seed into own database
// the first seed, "seed0" is the seed of the responding peer
2011-12-06 02:24:51 +01:00
if ( seedStr . length ( ) > Seed . maxsize ) {
Network . log . logInfo ( " hello/client: rejected contacting seed; too large ( "
+ seedStr . length ( )
+ " > "
+ Seed . maxsize
+ " ) " ) ;
2007-06-13 00:08:33 +02:00
} else {
2010-10-26 17:00:22 +02:00
try {
2011-12-06 02:24:51 +01:00
if ( i = = 1 ) {
2011-06-13 23:44:03 +02:00
final int p = address . indexOf ( ':' ) ;
2011-12-06 02:24:51 +01:00
if ( p < 0 ) {
return - 1 ;
}
2011-06-13 23:44:03 +02:00
final String host = Domains . dnsResolve ( address . substring ( 0 , p ) ) . getHostAddress ( ) ;
2012-07-05 10:23:07 +02:00
s = Seed . genRemoteSeed ( seedStr , false , host ) ;
2011-04-29 12:58:12 +02:00
} else {
2012-07-05 10:23:07 +02:00
s = Seed . genRemoteSeed ( seedStr , false , null ) ;
2011-04-29 12:58:12 +02:00
}
2011-12-06 02:24:51 +01:00
if ( peerActions . peerArrival ( s , ( i = = 1 ) ) ) {
count + + ;
}
} catch ( final IOException e ) {
Network . log . logInfo ( " hello/client: rejected contacting seed; bad ( "
+ e . getMessage ( )
+ " ) " ) ;
2010-10-26 17:00:22 +02:00
}
2007-06-13 00:08:33 +02:00
}
2005-05-07 23:11:18 +02:00
}
2011-07-14 09:07:06 +02:00
final int connectedAfter = peerActions . sizeConnected ( ) ;
// update event tracker
2011-12-06 02:24:51 +01:00
EventTracker . update ( EventTracker . EClass . PEERPING , new ProfilingGraph . EventPing (
mySeed . getName ( ) ,
otherName ,
true ,
connectedAfter - connectedBefore ) , false ) ;
2011-07-14 09:07:06 +02:00
2005-05-07 23:11:18 +02:00
return count ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2011-10-04 11:06:24 +02:00
public static Seed querySeed ( final Seed target , final String seedHash ) {
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// send request
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " object " , UTF8 . StringBody ( " seed " ) ) ;
parts . put ( " env " , UTF8 . StringBody ( seedHash ) ) ;
2010-07-15 02:59:53 +02:00
final byte [ ] content = postToFile ( target , " query.html " , parts , 10000 ) ;
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( content ) ;
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( result = = null | | result . isEmpty ( ) ) {
return null ;
}
2005-10-17 17:46:12 +02:00
//final Date remoteTime = yacyCore.parseUniversalDate((String) result.get(yacySeed.MYTIME)); // read remote time
2012-07-05 10:23:07 +02:00
return Seed . genRemoteSeed ( result . get ( " response " ) , false , target . getIP ( ) ) ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.querySeed error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2011-10-04 11:06:24 +02:00
public static int queryRWICount ( final Seed target , final String wordHash ) {
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// send request
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " object " , UTF8 . StringBody ( " rwicount " ) ) ;
parts . put ( " ttl " , UTF8 . StringBody ( " 0 " ) ) ;
parts . put ( " env " , UTF8 . StringBody ( wordHash ) ) ;
2010-07-15 02:59:53 +02:00
final byte [ ] content = postToFile ( target , " query.html " , parts , 5000 ) ;
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( content ) ;
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( result = = null | | result . isEmpty ( ) ) {
return - 1 ;
}
2008-06-06 18:01:27 +02:00
return Integer . parseInt ( result . get ( " response " ) ) ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.queryRWICount error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return - 1 ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2011-04-29 03:52:20 +02:00
/ * *
* check the status of a remote peer
2012-02-25 14:07:02 +01:00
*
2011-04-29 03:52:20 +02:00
* @param target
* @return an array of two long : [ 0 ] is the count of urls , [ 1 ] is a magic
* /
2011-10-04 11:06:24 +02:00
public static long [ ] queryUrlCount ( final Seed target ) {
2011-12-06 02:24:51 +01:00
if ( target = = null ) {
return new long [ ] {
- 1 , - 1
} ;
}
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-05-07 23:11:18 +02:00
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " object " , UTF8 . StringBody ( " lurlcount " ) ) ;
parts . put ( " ttl " , UTF8 . StringBody ( " 0 " ) ) ;
parts . put ( " env " , UTF8 . StringBody ( " " ) ) ;
2010-07-15 02:59:53 +02:00
final byte [ ] content = postToFile ( target , " query.html " , parts , 5000 ) ;
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( content ) ;
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( result = = null | | result . isEmpty ( ) ) {
return new long [ ] {
- 1 , - 1
} ;
}
2008-06-06 18:01:27 +02:00
final String resp = result . get ( " response " ) ;
2011-12-06 02:24:51 +01:00
if ( resp = = null ) {
return new long [ ] {
- 1 , - 1
} ;
}
String magic = result . get ( " magic " ) ;
if ( magic = = null ) {
magic = " 0 " ;
}
2008-08-02 15:57:00 +02:00
try {
2011-12-06 02:24:51 +01:00
return new long [ ] {
Long . parseLong ( resp ) , Long . parseLong ( magic )
} ;
} catch ( final NumberFormatException e ) {
return new long [ ] {
- 1 , - 1
} ;
2006-09-07 03:13:03 +02:00
}
2011-12-06 02:24:51 +01:00
} catch ( final IOException e ) {
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " yacyClient.queryUrlCount error asking peer ' "
+ target . getName ( )
+ " ': "
+ e . toString ( ) ) ;
}
return new long [ ] {
- 1 , - 1
} ;
2005-05-07 23:11:18 +02:00
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2011-12-06 02:24:51 +01:00
public static RSSFeed queryRemoteCrawlURLs (
final SeedDB seedDB ,
final Seed target ,
final int maxCount ,
final long maxTime ) {
2011-06-13 23:44:03 +02:00
// returns a list of
2011-12-06 02:24:51 +01:00
if ( target = = null ) {
return null ;
}
2011-10-04 11:06:24 +02:00
final int targetCount = Integer . parseInt ( target . get ( Seed . RCOUNT , " 0 " ) ) ;
2011-12-06 02:24:51 +01:00
if ( targetCount < = 0 ) {
Network . log . logWarning ( " yacyClient.queryRemoteCrawlURLs wrong peer ' "
+ target . getName ( )
+ " ' selected: not enough links available " ) ;
2010-09-16 11:34:17 +02:00
return null ;
}
2007-11-29 03:07:37 +01:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-11-29 03:07:37 +01:00
// send request
try {
2008-04-09 13:02:14 +02:00
/* a long time-out is needed */
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " call " , UTF8 . StringBody ( " remotecrawl " ) ) ;
parts . put ( " count " , UTF8 . StringBody ( Integer . toString ( maxCount ) ) ) ;
parts . put ( " time " , UTF8 . StringBody ( Long . toString ( maxTime ) ) ) ;
2011-06-13 23:44:03 +02:00
// final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
2011-04-26 15:35:29 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , ( int ) maxTime ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] result =
httpClient . POSTbytes ( new MultiProtocolURI ( " http:// "
+ target . getClusterAddress ( )
+ " /yacy/urls.xml " ) , target . getHexHash ( ) + " .yacyh " , parts , false ) ;
2010-08-23 13:41:12 +02:00
final RSSReader reader = RSSReader . parse ( RSSFeed . DEFAULT_MAXSIZE , result ) ;
2011-12-06 02:24:51 +01:00
if ( reader = = null ) {
Network . log . logWarning ( " yacyClient.queryRemoteCrawlURLs failed asking peer ' "
+ target . getName ( )
+ " ': probably bad response from remote peer (1), reader == null " ) ;
2011-10-04 11:06:24 +02:00
target . put ( Seed . RCOUNT , " 0 " ) ;
2008-05-22 01:07:37 +02:00
seedDB . update ( target . hash , target ) ; // overwrite number of remote-available number to avoid that this peer is called again (until update is done by peer ping)
2009-11-05 21:28:37 +01:00
//Log.logException(e);
2008-05-22 01:07:37 +02:00
return null ;
}
2008-08-02 14:12:04 +02:00
final RSSFeed feed = reader . getFeed ( ) ;
2011-12-06 02:24:51 +01:00
if ( feed = = null ) {
2007-11-29 03:07:37 +01:00
// case where the rss reader does not understand the content
2011-12-06 02:24:51 +01:00
Network . log . logWarning ( " yacyClient.queryRemoteCrawlURLs failed asking peer ' "
+ target . getName ( )
+ " ': probably bad response from remote peer (2) " ) ;
2011-03-07 21:36:40 +01:00
//System.out.println("***DEBUG*** rss input = " + UTF8.String(result));
2011-10-04 11:06:24 +02:00
target . put ( Seed . RCOUNT , " 0 " ) ;
2008-05-06 01:13:47 +02:00
seedDB . update ( target . hash , target ) ; // overwrite number of remote-available number to avoid that this peer is called again (until update is done by peer ping)
2009-11-05 21:28:37 +01:00
//Log.logException(e);
2007-11-29 03:07:37 +01:00
return null ;
}
2010-09-16 11:34:17 +02:00
// update number of remotely available links in seed
2011-10-04 11:06:24 +02:00
target . put ( Seed . RCOUNT , Integer . toString ( Math . max ( 0 , targetCount - feed . size ( ) ) ) ) ;
2010-09-16 11:34:17 +02:00
seedDB . update ( target . hash , target ) ;
2008-04-24 23:31:07 +02:00
return feed ;
2011-12-06 02:24:51 +01:00
} catch ( final IOException e ) {
Network . log . logWarning ( " yacyClient.queryRemoteCrawlURLs error asking peer ' "
+ target . getName ( )
+ " ': "
+ e . toString ( ) ) ;
2007-11-29 03:07:37 +01:00
return null ;
}
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
public static RSSFeed search (
final Seed targetSeed ,
final String query ,
final CacheStrategy verify ,
final boolean global ,
final long timeout ,
final int startRecord ,
final int maximumRecords ) throws IOException {
final String address =
( targetSeed = = null | | targetSeed = = Switchboard . getSwitchboard ( ) . peers . mySeed ( ) ) ? " localhost: "
+ Switchboard . getSwitchboard ( ) . getConfig ( " port " , " 8090 " ) : targetSeed . getClusterAddress ( ) ;
2011-06-13 23:44:03 +02:00
final String urlBase = " http:// " + address + " /yacysearch.rss " ;
2011-12-06 02:24:51 +01:00
return SRURSSConnector . loadSRURSS (
urlBase ,
query ,
timeout ,
startRecord ,
maximumRecords ,
verify ,
global ,
null ) ;
2010-05-11 13:14:05 +02:00
}
2010-09-14 15:35:47 +02:00
2010-06-17 13:59:40 +02:00
public static int search (
2011-12-06 02:24:51 +01:00
final Seed mySeed ,
final String wordhashes ,
final String excludehashes ,
final String urlhashes ,
final Pattern prefer ,
final Pattern filter ,
final String modifier ,
final String language ,
final String sitehash ,
final String authorhash ,
2012-04-22 02:05:17 +02:00
final String contentdom ,
2011-12-06 02:24:51 +01:00
final int count ,
final long time ,
final int maxDistance ,
final boolean global ,
final int partitions ,
final Seed target ,
final Segment indexSegment ,
final RWIProcess containerCache ,
final SearchEvent . SecondarySearchSuperviser secondarySearchSuperviser ,
final Blacklist blacklist ,
final RankingProfile rankingProfile ,
final Bitfield constraint ) {
2005-05-07 23:11:18 +02:00
// send a search request to peer with remote Hash
2005-09-21 23:32:43 +02:00
2005-05-07 23:11:18 +02:00
// INPUT:
2007-02-01 14:27:23 +01:00
// iam : complete seed of the requesting peer
// youare : seed hash of the target peer, used for testing network stability
// key : transmission key for response
// search : a list of search words
// hsearch : a string of word hashes
// fwdep : forward depth. if "0" then peer may NOT ask another peer for more results
// fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping
// count : maximum number of wanted results
// global : if "true", then result may consist of answers from other peers
// partitions : number of remote peers that are asked (for evaluation of QPM)
// duetime : maximum time that a peer should spent to create a result
2005-09-21 23:32:43 +02:00
2007-07-05 01:48:52 +02:00
final long timestamp = System . currentTimeMillis ( ) ;
2011-12-06 15:28:48 +01:00
containerCache . addExpectedRemoteReferences ( count ) ;
2010-09-14 15:35:47 +02:00
SearchResult result ;
2007-07-05 01:48:52 +02:00
try {
2011-12-06 02:24:51 +01:00
result =
new SearchResult (
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , crypt . randomSalt ( ) ) ,
mySeed ,
wordhashes ,
excludehashes ,
urlhashes ,
prefer ,
filter ,
modifier ,
language ,
sitehash ,
authorhash ,
2012-04-22 02:05:17 +02:00
contentdom ,
2011-12-06 02:24:51 +01:00
count ,
time ,
maxDistance ,
global ,
partitions ,
target . getHexHash ( ) + " .yacyh " ,
target . getClusterAddress ( ) ,
secondarySearchSuperviser ,
rankingProfile ,
constraint ) ;
} catch ( final IOException e ) {
Network . log . logInfo ( " SEARCH failed, Peer: "
+ target . hash
+ " : "
+ target . getName ( )
+ " ( "
+ e . getMessage ( )
+ " ) " ) ;
2008-03-12 01:56:18 +01:00
//yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
2010-06-17 13:59:40 +02:00
return - 1 ;
2007-07-05 01:48:52 +02:00
}
2010-09-14 15:35:47 +02:00
// computation time
final long totalrequesttime = System . currentTimeMillis ( ) - timestamp ;
2011-06-13 23:44:03 +02:00
2011-11-24 23:45:31 +01:00
final boolean thisIsASecondarySearch = urlhashes . length ( ) > 0 ;
assert ! thisIsASecondarySearch | | secondarySearchSuperviser = = null ;
2010-09-14 15:35:47 +02:00
// create containers
final int words = wordhashes . length ( ) / Word . commonHashLength ;
assert words > 0 : " wordhashes = " + wordhashes ;
2012-07-05 01:02:51 +02:00
final List < ReferenceContainer < WordReference > > container = new ArrayList < ReferenceContainer < WordReference > > ( words ) ;
2011-12-06 02:24:51 +01:00
for ( int i = 0 ; i < words ; i + + ) {
2010-09-14 15:35:47 +02:00
try {
2012-07-05 01:02:51 +02:00
container . add ( ReferenceContainer . emptyContainer (
2011-12-06 02:24:51 +01:00
Segment . wordReferenceFactory ,
2012-07-05 01:02:51 +02:00
ASCII . getBytes ( wordhashes . substring ( i * Word . commonHashLength , ( i + 1 ) * Word . commonHashLength ) ) ,
count ) ) ;
2011-12-06 02:24:51 +01:00
} catch ( final RowSpaceExceededException e ) {
2010-01-09 01:08:16 +01:00
Log . logException ( e ) ;
2010-06-17 13:59:40 +02:00
return - 1 ;
2010-01-09 01:08:16 +01:00
}
2010-09-14 15:35:47 +02:00
}
2007-06-22 16:29:14 +02:00
2010-09-14 15:35:47 +02:00
// insert results to containers
2011-09-15 13:17:02 +02:00
int term = count ;
2011-12-06 02:24:51 +01:00
for ( final URIMetadataRow urlEntry : result . links ) {
if ( term - - < = 0 ) {
break ; // do not process more that requested (in case that evil peers fill us up with rubbish)
}
2010-09-14 15:35:47 +02:00
// get one single search result
2011-12-06 02:24:51 +01:00
if ( urlEntry = = null ) {
continue ;
}
2011-05-27 10:24:54 +02:00
assert ( urlEntry . hash ( ) . length = = 12 ) : " urlEntry.hash() = " + ASCII . String ( urlEntry . hash ( ) ) ;
2011-12-06 02:24:51 +01:00
if ( urlEntry . hash ( ) . length ! = 12 ) {
continue ; // bad url hash
2012-06-11 00:17:30 +02:00
}
if ( blacklist . isListed ( BlacklistType . SEARCH , urlEntry ) ) {
if ( Network . log . isInfo ( ) ) {
2011-12-06 02:24:51 +01:00
Network . log . logInfo ( " remote search: filtered blacklisted url "
2011-12-17 01:27:08 +01:00
+ urlEntry . url ( )
2011-12-06 02:24:51 +01:00
+ " from peer "
+ target . getName ( ) ) ;
}
2010-09-14 15:35:47 +02:00
continue ; // block with backlist
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
final String urlRejectReason =
2011-12-17 01:27:08 +01:00
Switchboard . getSwitchboard ( ) . crawlStacker . urlInAcceptedDomain ( urlEntry . url ( ) ) ;
2011-12-06 02:24:51 +01:00
if ( urlRejectReason ! = null ) {
if ( Network . log . isInfo ( ) ) {
Network . log . logInfo ( " remote search: rejected url ' "
2011-12-17 01:27:08 +01:00
+ urlEntry . url ( )
2011-12-06 02:24:51 +01:00
+ " ' ( "
+ urlRejectReason
+ " ) from peer "
+ target . getName ( ) ) ;
}
2007-07-24 02:46:17 +02:00
continue ; // reject url outside of our domain
}
2006-12-05 03:47:51 +01:00
2010-09-14 15:35:47 +02:00
// save the url entry
2011-06-13 23:44:03 +02:00
final Reference entry = urlEntry . word ( ) ;
2011-12-06 02:24:51 +01:00
if ( entry = = null ) {
if ( Network . log . isWarning ( ) ) {
Network . log . logWarning ( " remote search: no word attached from peer "
+ target . getName ( )
+ " , version "
+ target . getVersion ( ) ) ;
}
2010-09-14 15:35:47 +02:00
continue ; // no word attached
}
2005-09-21 23:32:43 +02:00
2010-09-14 15:35:47 +02:00
// the search-result-url transports all the attributes of word indexes
2011-12-06 02:24:51 +01:00
if ( ! Base64Order . enhancedCoder . equal ( entry . urlhash ( ) , urlEntry . hash ( ) ) ) {
Network . log . logInfo ( " remote search: url-hash "
+ ASCII . String ( urlEntry . hash ( ) )
+ " does not belong to word-attached-hash "
+ ASCII . String ( entry . urlhash ( ) )
+ " ; url = "
2011-12-17 01:27:08 +01:00
+ urlEntry . url ( )
2011-12-06 02:24:51 +01:00
+ " from peer "
+ target . getName ( ) ) ;
2010-09-14 15:35:47 +02:00
continue ; // spammed
}
2005-05-07 23:11:18 +02:00
2010-09-14 15:35:47 +02:00
// passed all checks, store url
try {
indexSegment . urlMetadata ( ) . store ( urlEntry ) ;
2011-12-06 02:24:51 +01:00
ResultURLs . stack (
urlEntry ,
mySeed . hash . getBytes ( ) ,
UTF8 . getBytes ( target . hash ) ,
EventOrigin . QUERIES ) ;
} catch ( final IOException e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " could not store search result " , e ) ;
2010-09-14 15:35:47 +02:00
continue ; // db-error
}
2007-07-05 01:48:52 +02:00
2011-12-06 02:24:51 +01:00
if ( urlEntry . snippet ( ) ! = null
& & urlEntry . snippet ( ) . length ( ) > 0
& & ! urlEntry . snippet ( ) . equals ( " null " ) ) {
2010-09-14 15:35:47 +02:00
// we don't store the snippets along the url entry,
2007-08-15 13:36:59 +02:00
// because they are search-specific.
2010-09-14 15:35:47 +02:00
// instead, they are placed in a snipped-search cache.
2010-09-21 23:48:42 +02:00
// System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
2011-05-27 10:24:54 +02:00
TextSnippet . snippetsCache . put ( wordhashes , ASCII . String ( urlEntry . hash ( ) ) , urlEntry . snippet ( ) ) ;
2010-09-14 15:35:47 +02:00
}
2011-06-13 23:44:03 +02:00
2010-09-14 15:35:47 +02:00
// add the url entry to the word indexes
2012-07-05 01:02:51 +02:00
for ( final ReferenceContainer < WordReference > c : container ) {
2010-09-14 15:35:47 +02:00
try {
2012-07-05 01:02:51 +02:00
c . add ( entry ) ;
2011-12-06 02:24:51 +01:00
} catch ( final RowSpaceExceededException e ) {
2009-12-10 00:27:26 +01:00
Log . logException ( e ) ;
break ;
}
2010-09-14 15:35:47 +02:00
}
}
2007-07-05 01:48:52 +02:00
2007-09-04 01:43:55 +02:00
// store remote result to local result container
2010-09-10 00:42:54 +02:00
// insert one container into the search result buffer
2010-09-14 15:35:47 +02:00
// one is enough, only the references are used, not the word
2012-07-05 01:02:51 +02:00
containerCache . add ( container . get ( 0 ) , false , target . getName ( ) + " / " + target . hash , result . joincount , true , time ) ;
2011-12-06 15:28:48 +01:00
containerCache . addExpectedRemoteReferences ( - count ) ;
2010-09-10 00:42:54 +02:00
2010-06-17 13:59:40 +02:00
// insert the containers to the index
2011-12-06 02:24:51 +01:00
for ( final ReferenceContainer < WordReference > c : container ) {
try {
indexSegment . termIndex ( ) . add ( c ) ;
} catch ( final Exception e ) {
Log . logException ( e ) ;
}
2010-06-17 13:59:40 +02:00
}
2010-09-14 15:35:47 +02:00
2011-12-06 02:24:51 +01:00
Network . log . logInfo ( " remote search: peer "
+ target . getName ( )
+ " sent "
2012-07-05 01:02:51 +02:00
+ container . get ( 0 ) . size ( )
2011-12-06 02:24:51 +01:00
+ " / "
+ result . joincount
+ " references for "
+ ( thisIsASecondarySearch ? " a secondary search " : " joined word queries " ) ) ;
2010-06-17 13:59:40 +02:00
// integrate remote top-words/topics
2011-12-06 02:24:51 +01:00
if ( result . references ! = null & & result . references . length > 0 ) {
Network . log . logInfo ( " remote search: peer "
+ target . getName ( )
+ " sent "
+ result . references . length
+ " topics " ) ;
2010-06-17 13:59:40 +02:00
// add references twice, so they can be counted (must have at least 2 entries)
2011-12-06 02:24:51 +01:00
synchronized ( containerCache ) {
2010-09-14 15:35:47 +02:00
containerCache . addTopic ( result . references ) ;
containerCache . addTopic ( result . references ) ;
2007-09-08 13:50:19 +02:00
}
2007-09-04 01:43:55 +02:00
}
2011-06-13 23:44:03 +02:00
2010-09-14 15:35:47 +02:00
// read index abstract
2011-12-06 02:24:51 +01:00
if ( secondarySearchSuperviser ! = null ) {
2010-09-14 15:35:47 +02:00
String wordhash ;
String whacc = " " ;
ByteBuffer ci ;
int ac = 0 ;
2011-12-06 02:24:51 +01:00
for ( final Map . Entry < byte [ ] , String > abstractEntry : result . indexabstract . entrySet ( ) ) {
2010-09-14 15:35:47 +02:00
try {
2011-03-05 20:52:34 +01:00
ci = new ByteBuffer ( abstractEntry . getValue ( ) ) ;
2011-05-27 10:24:54 +02:00
wordhash = ASCII . String ( abstractEntry . getKey ( ) ) ;
2011-12-06 02:24:51 +01:00
} catch ( final OutOfMemoryError e ) {
2010-09-14 15:35:47 +02:00
Log . logException ( e ) ;
2011-03-05 20:52:34 +01:00
continue ;
2010-09-14 15:35:47 +02:00
}
2011-03-05 20:52:34 +01:00
whacc + = wordhash ;
2011-12-06 02:24:51 +01:00
secondarySearchSuperviser . addAbstract (
wordhash ,
WordReferenceFactory . decompressIndex ( ci , target . hash ) ) ;
2010-09-14 15:35:47 +02:00
ac + + ;
2011-06-13 23:44:03 +02:00
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
if ( ac > 0 ) {
2010-09-14 15:35:47 +02:00
secondarySearchSuperviser . commitAbstract ( ) ;
2011-12-06 02:24:51 +01:00
Network . log . logInfo ( " remote search: peer "
+ target . getName ( )
+ " sent "
+ ac
+ " index abstracts for words "
+ whacc ) ;
2010-09-14 15:35:47 +02:00
}
}
2011-06-13 23:44:03 +02:00
2008-02-03 13:40:40 +01:00
// generate statistics
2011-12-06 02:24:51 +01:00
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " SEARCH "
+ result . urlcount
+ " URLS FROM "
+ target . hash
+ " : "
+ target . getName ( )
+ " , searchtime= "
+ result . searchtime
+ " , netdelay= "
+ ( totalrequesttime - result . searchtime )
+ " , references= "
+ result . references ) ;
}
2010-09-14 15:35:47 +02:00
return result . urlcount ;
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
public static class SearchResult
{
2011-06-13 23:44:03 +02:00
2010-09-14 15:35:47 +02:00
public String version ; // version : application version of responder
public String uptime ; // uptime : uptime in seconds of responder
public String fwhop ; // hops (depth) of forwards that had been performed to construct this result
public String fwsrc ; // peers that helped to construct this result
public String fwrec ; // peers that would have helped to construct this result (recommendations)
public int urlcount ; // number of returned LURL's for this search
public int joincount ; //
public Map < byte [ ] , Integer > indexcount ; //
public long searchtime ; // time that the peer actually spent to create the result
public String [ ] references ; // search hints, the top-words
public List < URIMetadataRow > links ; // LURLs of search
public Map < byte [ ] , String > indexabstract ; // index abstracts, a collection of url-hashes per word
2011-06-13 23:44:03 +02:00
2010-09-15 13:38:03 +02:00
public SearchResult (
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts ,
final Seed mySeed ,
final String wordhashes ,
final String excludehashes ,
final String urlhashes ,
final Pattern prefer ,
final Pattern filter ,
final String modifier ,
final String language ,
final String sitehash ,
final String authorhash ,
2012-04-22 02:05:17 +02:00
final String contentdom ,
2011-12-06 02:24:51 +01:00
final int count ,
final long time ,
final int maxDistance ,
final boolean global ,
final int partitions ,
final String hostname ,
final String hostaddress ,
final SearchEvent . SecondarySearchSuperviser secondarySearchSuperviser ,
final RankingProfile rankingProfile ,
final Bitfield constraint ) throws IOException {
2010-09-15 13:38:03 +02:00
// send a search request to peer with remote Hash
2011-11-23 23:21:14 +01:00
//if (hostaddress.equals(mySeed.getClusterAddress())) hostaddress = "127.0.0.1:" + mySeed.getPort(); // for debugging
2010-09-15 13:38:03 +02:00
// INPUT:
// iam : complete seed of the requesting peer
// youare : seed hash of the target peer, used for testing network stability
// key : transmission key for response
// search : a list of search words
// hsearch : a string of word hashes
// fwdep : forward depth. if "0" then peer may NOT ask another peer for more results
// fwden : forward deny, a list of seed hashes. They may NOT be target of forward hopping
// count : maximum number of wanted results
// global : if "true", then result may consist of answers from other peers
// partitions : number of remote peers that are asked (for evaluation of QPM)
// duetime : maximum time that a peer should spent to create a result
2011-06-13 23:44:03 +02:00
2010-09-15 13:38:03 +02:00
// send request
Map < String , String > resultMap = null ;
2011-02-28 07:28:29 +01:00
String key = " " ;
2011-06-13 23:44:03 +02:00
final ContentBody keyBody = parts . get ( " key " ) ;
2011-12-06 02:24:51 +01:00
if ( keyBody ! = null ) {
2011-06-13 23:44:03 +02:00
final ByteArrayOutputStream baos = new ByteArrayOutputStream ( 20 ) ;
2011-02-28 07:28:29 +01:00
keyBody . writeTo ( baos ) ;
key = baos . toString ( ) ;
}
parts . put ( " myseed " , UTF8 . StringBody ( ( mySeed = = null ) ? " " : mySeed . genSeedStr ( key ) ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " count " , UTF8 . StringBody ( Integer . toString ( Math . max ( 10 , count ) ) ) ) ;
2011-03-04 14:44:00 +01:00
parts . put ( " time " , UTF8 . StringBody ( Long . toString ( Math . max ( 3000 , time ) ) ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " resource " , UTF8 . StringBody ( ( ( global ) ? " global " : " local " ) ) ) ;
parts . put ( " partitions " , UTF8 . StringBody ( Integer . toString ( partitions ) ) ) ;
parts . put ( " query " , UTF8 . StringBody ( wordhashes ) ) ;
parts . put ( " exclude " , UTF8 . StringBody ( excludehashes ) ) ;
parts . put ( " duetime " , UTF8 . StringBody ( " 1000 " ) ) ;
parts . put ( " urls " , UTF8 . StringBody ( urlhashes ) ) ;
2011-05-06 00:37:06 +02:00
parts . put ( " prefer " , UTF8 . StringBody ( prefer . pattern ( ) ) ) ;
parts . put ( " filter " , UTF8 . StringBody ( filter . pattern ( ) ) ) ;
2011-11-26 14:40:33 +01:00
parts . put ( " modifier " , UTF8 . StringBody ( modifier ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " language " , UTF8 . StringBody ( language ) ) ;
parts . put ( " sitehash " , UTF8 . StringBody ( sitehash ) ) ;
parts . put ( " authorhash " , UTF8 . StringBody ( authorhash ) ) ;
2012-04-22 02:05:17 +02:00
parts . put ( " contentdom " , UTF8 . StringBody ( contentdom ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " ttl " , UTF8 . StringBody ( " 0 " ) ) ;
parts . put ( " maxdist " , UTF8 . StringBody ( Integer . toString ( maxDistance ) ) ) ;
parts . put ( " profile " , UTF8 . StringBody ( crypt . simpleEncode ( rankingProfile . toExternalString ( ) ) ) ) ;
parts . put ( " constraint " , UTF8 . StringBody ( ( constraint = = null ) ? " " : constraint . exportB64 ( ) ) ) ;
2011-12-06 02:24:51 +01:00
if ( secondarySearchSuperviser ! = null ) {
parts . put ( " abstracts " , UTF8 . StringBody ( " auto " ) ) ;
// resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
}
2010-09-15 13:38:03 +02:00
2011-12-02 02:36:03 +01:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , 8000 ) ;
2012-02-25 14:07:02 +01:00
byte [ ] a = httpClient . POSTbytes ( new MultiProtocolURI ( " http:// " + hostaddress + " /yacy/search.html " ) , hostname , parts , false ) ;
if ( a ! = null & & a . length > 200000 ) {
// there is something wrong. This is too large, maybe a hack on the other side?
a = null ;
}
resultMap = FileUtils . table ( a ) ;
2011-06-13 23:44:03 +02:00
2010-09-15 13:38:03 +02:00
// evaluate request result
2011-12-06 02:24:51 +01:00
if ( resultMap = = null | | resultMap . isEmpty ( ) ) {
throw new IOException ( " resultMap is NULL " ) ;
}
2010-09-14 15:35:47 +02:00
try {
this . searchtime = Integer . parseInt ( resultMap . get ( " searchtime " ) ) ;
2011-12-06 02:24:51 +01:00
} catch ( final NumberFormatException e ) {
throw new IOException ( " wrong output format for searchtime: "
+ e . getMessage ( )
+ " , map = "
+ resultMap . toString ( ) ) ;
2010-09-14 15:35:47 +02:00
}
try {
this . joincount = Integer . parseInt ( resultMap . get ( " joincount " ) ) ; // the complete number of hits at remote site
2011-12-06 02:24:51 +01:00
} catch ( final NumberFormatException e ) {
2010-09-14 15:35:47 +02:00
throw new IOException ( " wrong output format for joincount: " + e . getMessage ( ) ) ;
}
try {
2011-12-06 02:24:51 +01:00
this . urlcount = Integer . parseInt ( resultMap . get ( " count " ) ) ; // the number of hits that are returned in the result list
} catch ( final NumberFormatException e ) {
2010-09-14 15:35:47 +02:00
throw new IOException ( " wrong output format for count: " + e . getMessage ( ) ) ;
}
this . fwhop = resultMap . get ( " fwhop " ) ;
this . fwsrc = resultMap . get ( " fwsrc " ) ;
this . fwrec = resultMap . get ( " fwrec " ) ;
// scan the result map for entries with special prefix
2011-06-13 23:44:03 +02:00
this . indexcount = new TreeMap < byte [ ] , Integer > ( Base64Order . enhancedCoder ) ;
this . indexabstract = new TreeMap < byte [ ] , String > ( Base64Order . enhancedCoder ) ;
2011-12-06 02:24:51 +01:00
for ( final Map . Entry < String , String > entry : resultMap . entrySet ( ) ) {
if ( entry . getKey ( ) . startsWith ( " indexcount. " ) ) {
this . indexcount . put (
UTF8 . getBytes ( entry . getKey ( ) . substring ( 11 ) ) ,
Integer . parseInt ( entry . getValue ( ) ) ) ;
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
if ( entry . getKey ( ) . startsWith ( " indexabstract. " ) ) {
2011-06-13 23:44:03 +02:00
this . indexabstract . put ( UTF8 . getBytes ( entry . getKey ( ) . substring ( 14 ) ) , entry . getValue ( ) ) ;
2010-09-14 15:35:47 +02:00
}
}
2011-06-13 23:44:03 +02:00
this . references = resultMap . get ( " references " ) . split ( " , " ) ;
2010-09-14 15:35:47 +02:00
this . links = new ArrayList < URIMetadataRow > ( this . urlcount ) ;
2011-12-06 02:24:51 +01:00
for ( int n = 0 ; n < this . urlcount ; n + + ) {
2010-09-14 15:35:47 +02:00
// get one single search result
2011-06-13 23:44:03 +02:00
final String resultLine = resultMap . get ( " resource " + n ) ;
2011-12-06 02:24:51 +01:00
if ( resultLine = = null ) {
continue ;
}
2011-06-13 23:44:03 +02:00
final URIMetadataRow urlEntry = URIMetadataRow . importEntry ( resultLine ) ;
2011-12-06 02:24:51 +01:00
if ( urlEntry = = null ) {
continue ;
}
2010-09-14 15:35:47 +02:00
this . links . add ( urlEntry ) ;
}
}
}
2011-06-13 23:44:03 +02:00
2011-10-04 11:06:24 +02:00
public static Map < String , String > permissionMessage ( final SeedDB seedDB , final String targetHash ) {
2005-05-07 23:11:18 +02:00
// ask for allowed message size and attachement size
// if this replies null, the peer does not answer
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-05-07 23:11:18 +02:00
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , targetHash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " process " , UTF8 . StringBody ( " permission " ) ) ;
2010-07-15 02:59:53 +02:00
final byte [ ] content = postToFile ( seedDB , targetHash , " message.html " , parts , 5000 ) ;
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( content ) ;
2007-07-05 01:48:52 +02:00
return result ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2005-05-07 23:11:18 +02:00
// most probably a network time-out exception
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.permissionMessage error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return null ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2011-12-06 02:24:51 +01:00
public static Map < String , String > postMessage (
final SeedDB seedDB ,
final String targetHash ,
final String subject ,
final byte [ ] message ) {
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2007-08-08 20:23:45 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-11-11 00:48:20 +01:00
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , targetHash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " process " , UTF8 . StringBody ( " post " ) ) ;
parts . put ( " myseed " , UTF8 . StringBody ( seedDB . mySeed ( ) . genSeedStr ( salt ) ) ) ;
parts . put ( " subject " , UTF8 . StringBody ( subject ) ) ;
2011-03-07 21:36:40 +01:00
parts . put ( " message " , UTF8 . StringBody ( message ) ) ;
2010-07-15 02:59:53 +02:00
final byte [ ] content = postToFile ( seedDB , targetHash , " message.html " , parts , 20000 ) ;
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( content ) ;
2007-07-05 01:48:52 +02:00
return result ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.postMessage error: " + e . getMessage ( ) ) ;
2005-11-11 00:48:20 +01:00
return null ;
}
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
public static Map < String , String > crawlReceipt (
final Seed mySeed ,
final Seed target ,
final String process ,
final String result ,
final String reason ,
final URIMetadataRow entry ,
final String wordhashes ) {
2007-07-05 01:48:52 +02:00
assert ( target ! = null ) ;
2008-05-06 01:13:47 +02:00
assert ( mySeed ! = null ) ;
assert ( mySeed ! = target ) ;
2005-09-21 23:32:43 +02:00
2005-04-07 21:19:42 +02:00
/ *
the result can have one of the following values :
negative cases , no retry
unavailable - the resource is not avaiable ( a broken link ) ; not found or interrupted
robot - a robot - file has denied to crawl that resource
2011-06-13 23:44:03 +02:00
2005-04-07 21:19:42 +02:00
negative cases , retry possible
rejected - the peer has rejected to load the resource
dequeue - peer too busy - rejected to crawl
2011-06-13 23:44:03 +02:00
2005-04-07 21:19:42 +02:00
positive cases with crawling
fill - the resource was loaded and processed
update - the resource was already in database but re - loaded and processed
2011-06-13 23:44:03 +02:00
2005-05-07 23:11:18 +02:00
positive cases without crawling
2005-04-07 21:19:42 +02:00
known - the resource is already in database , believed to be fresh and not reloaded
stale - the resource was reloaded but not processed because source had no changes
2011-06-13 23:44:03 +02:00
2005-05-07 23:11:18 +02:00
* /
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// prepare request
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2005-10-22 15:28:04 +02:00
// determining target address
2007-07-05 01:48:52 +02:00
final String address = target . getClusterAddress ( ) ;
2011-12-06 02:24:51 +01:00
if ( address = = null ) {
return null ;
}
2011-06-13 23:44:03 +02:00
2007-07-05 01:48:52 +02:00
// send request
2005-04-07 21:19:42 +02:00
try {
2010-07-14 00:10:24 +02:00
// prepare request
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " process " , UTF8 . StringBody ( process ) ) ;
2011-05-27 10:24:54 +02:00
parts . put ( " urlhash " , UTF8 . StringBody ( ( ( entry = = null ) ? " " : ASCII . String ( entry . hash ( ) ) ) ) ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " result " , UTF8 . StringBody ( result ) ) ;
parts . put ( " reason " , UTF8 . StringBody ( reason ) ) ;
parts . put ( " wordh " , UTF8 . StringBody ( wordhashes ) ) ;
2011-12-06 02:24:51 +01:00
parts . put (
" lurlEntry " ,
UTF8 . StringBody ( ( ( entry = = null ) ? " " : crypt . simpleEncode ( entry . toString ( ) , salt ) ) ) ) ;
2010-07-14 00:10:24 +02:00
// send request
2011-04-26 13:46:31 +02:00
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
2011-04-26 15:35:29 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , 10000 ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] content =
httpClient . POSTbytes (
new MultiProtocolURI ( " http:// " + address + " /yacy/crawlReceipt.html " ) ,
target . getHexHash ( ) + " .yacyh " ,
parts ,
false ) ;
2009-10-05 22:11:41 +02:00
return FileUtils . table ( content ) ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2005-05-07 23:11:18 +02:00
// most probably a network time-out exception
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.crawlReceipt error: " + e . getMessage ( ) ) ;
2005-05-07 23:11:18 +02:00
return null ;
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2009-02-16 22:28:48 +01:00
/ * *
2011-12-06 02:24:51 +01:00
* transfer the index . If the transmission fails , return a string describing the cause . If everything is
* ok , return null .
2012-02-25 14:07:02 +01:00
*
2009-02-16 22:28:48 +01:00
* @param targetSeed
* @param indexes
* @param urlCache
* @param gzipBody
* @param timeout
* @return
* /
public static String transferIndex (
2011-12-06 02:24:51 +01:00
final Seed targetSeed ,
final ReferenceContainerCache < WordReference > indexes ,
2012-07-22 13:18:45 +02:00
final SortedMap < byte [ ] , URIMetadata > urlCache ,
2011-12-06 02:24:51 +01:00
final boolean gzipBody ,
final int timeout ) {
2011-02-25 14:26:09 +01:00
// check if we got all necessary urls in the urlCache (only for debugging)
Iterator < WordReference > eenum ;
Reference entry ;
2011-12-06 02:24:51 +01:00
for ( final ReferenceContainer < WordReference > ic : indexes ) {
2011-02-25 14:26:09 +01:00
eenum = ic . entries ( ) ;
2011-12-06 02:24:51 +01:00
while ( eenum . hasNext ( ) ) {
2011-02-25 14:26:09 +01:00
entry = eenum . next ( ) ;
2011-12-06 02:24:51 +01:00
if ( urlCache . get ( entry . urlhash ( ) ) = = null ) {
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " DEBUG transferIndex: to-send url hash ' "
+ ASCII . String ( entry . urlhash ( ) )
+ " ' is not contained in urlCache " ) ;
}
2006-06-14 11:40:42 +02:00
}
}
2011-06-13 23:44:03 +02:00
}
2011-02-25 14:26:09 +01:00
// transfer the RWI without the URLs
Map < String , String > in = transferRWI ( targetSeed , indexes , gzipBody , timeout ) ;
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( in = = null ) {
2011-02-25 14:26:09 +01:00
return " no connection from transferRWI " ;
}
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
String result = in . get ( " result " ) ;
2011-12-06 02:24:51 +01:00
if ( result = = null ) {
2011-02-25 14:26:09 +01:00
return " no result from transferRWI " ;
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( ! ( result . equals ( " ok " ) ) ) {
2011-02-25 14:26:09 +01:00
return result ;
}
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
// in now contains a list of unknown hashes
String uhss = in . get ( " unknownURL " ) ;
2011-12-06 02:24:51 +01:00
if ( uhss = = null ) {
2011-02-25 14:26:09 +01:00
return " no unknownURL tag in response " ;
}
2011-12-06 02:24:51 +01:00
EventChannel
. channels ( EventChannel . DHTSEND )
. addMessage (
new RSSMessage (
" Sent " + indexes . size ( ) + " RWIs to " + targetSeed . getName ( ) ,
" " ,
targetSeed . hash ) ) ;
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
uhss = uhss . trim ( ) ;
2012-07-10 22:59:03 +02:00
if ( uhss . isEmpty ( ) | | uhss . equals ( " , " ) ) {
2011-12-06 02:24:51 +01:00
return null ;
} // all url's known, we are ready here
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
final String [ ] uhs = uhss . split ( " , " ) ;
2011-12-06 02:24:51 +01:00
if ( uhs . length = = 0 ) {
return null ;
} // all url's known
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
// extract the urlCache from the result
2012-07-22 13:18:45 +02:00
final URIMetadata [ ] urls = new URIMetadataRow [ uhs . length ] ;
2011-12-06 02:24:51 +01:00
for ( int i = 0 ; i < uhs . length ; i + + ) {
2011-05-27 10:24:54 +02:00
urls [ i ] = urlCache . get ( ASCII . getBytes ( uhs [ i ] ) ) ;
2011-12-06 02:24:51 +01:00
if ( urls [ i ] = = null ) {
if ( Network . log . isFine ( ) ) {
Network . log . logFine ( " DEBUG transferIndex: requested url hash ' "
+ uhs [ i ]
+ " ', unknownURL=' "
+ uhss
+ " ' " ) ;
}
2006-06-14 11:40:42 +02:00
}
2005-04-07 21:19:42 +02:00
}
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
in = transferURL ( targetSeed , urls , gzipBody , timeout ) ;
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( in = = null ) {
2011-02-25 14:26:09 +01:00
return " no connection from transferURL " ;
}
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
result = in . get ( " result " ) ;
2011-12-06 02:24:51 +01:00
if ( result = = null ) {
2011-02-25 14:26:09 +01:00
return " no result from transferURL " ;
}
2011-06-13 23:44:03 +02:00
2011-12-06 02:24:51 +01:00
if ( ! result . equals ( " ok " ) ) {
2011-02-25 14:26:09 +01:00
return result ;
2011-06-13 23:44:03 +02:00
}
2011-12-06 02:24:51 +01:00
EventChannel . channels ( EventChannel . DHTSEND ) . addMessage (
new RSSMessage (
" Sent " + uhs . length + " URLs to peer " + targetSeed . getName ( ) ,
" " ,
targetSeed . hash ) ) ;
2011-06-13 23:44:03 +02:00
2011-02-25 14:26:09 +01:00
return null ;
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2010-06-01 15:02:11 +02:00
private static Map < String , String > transferRWI (
2011-12-06 02:24:51 +01:00
final Seed targetSeed ,
final ReferenceContainerCache < WordReference > indexes ,
boolean gzipBody ,
final int timeout ) {
2007-04-30 00:05:34 +02:00
final String address = targetSeed . getPublicAddress ( ) ;
2011-12-06 02:24:51 +01:00
if ( address = = null ) {
Network . log . logWarning ( " no address for transferRWI " ) ;
return null ;
}
2007-07-05 01:48:52 +02:00
2005-04-07 21:19:42 +02:00
// prepare post values
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2005-09-22 12:30:55 +02:00
// enabling gzip compression for post request body
2011-12-06 02:24:51 +01:00
if ( gzipBody & & ( targetSeed . getVersion ( ) < yacyVersion . YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED ) ) {
2008-05-04 12:53:04 +02:00
gzipBody = false ;
}
2011-06-13 23:44:03 +02:00
2005-04-07 21:19:42 +02:00
int indexcount = 0 ;
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
final StringBuilder entrypost = new StringBuilder ( indexes . size ( ) * 73 ) ;
2009-04-15 08:34:27 +02:00
Iterator < WordReference > eenum ;
2009-03-02 00:58:14 +01:00
Reference entry ;
2011-12-06 02:24:51 +01:00
for ( final ReferenceContainer < WordReference > ic : indexes ) {
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
eenum = ic . entries ( ) ;
2011-12-06 02:24:51 +01:00
while ( eenum . hasNext ( ) ) {
2008-06-06 18:01:27 +02:00
entry = eenum . next ( ) ;
2011-12-06 02:24:51 +01:00
entrypost
. append ( ASCII . String ( ic . getTermHash ( ) ) )
. append ( entry . toPropertyForm ( ) )
. append ( serverCore . CRLF_STRING ) ;
2005-08-13 00:14:24 +02:00
indexcount + + ;
2005-04-07 21:19:42 +02:00
}
}
2005-09-21 23:32:43 +02:00
2011-12-06 02:24:51 +01:00
if ( indexcount = = 0 ) {
2005-08-13 00:14:24 +02:00
// nothing to do but everything ok
2010-11-28 03:57:31 +01:00
final Map < String , String > result = new HashMap < String , String > ( 2 ) ;
2005-08-13 00:14:24 +02:00
result . put ( " result " , " ok " ) ;
result . put ( " unknownURL " , " " ) ;
return result ;
2005-04-07 21:19:42 +02:00
}
2005-05-07 23:11:18 +02:00
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , targetSeed . hash , salt ) ;
2011-02-25 14:26:09 +01:00
parts . put ( " wordc " , UTF8 . StringBody ( Integer . toString ( indexes . size ( ) ) ) ) ;
parts . put ( " entryc " , UTF8 . StringBody ( Integer . toString ( indexcount ) ) ) ;
parts . put ( " indexes " , UTF8 . StringBody ( entrypost . toString ( ) ) ) ;
2011-04-26 13:46:31 +02:00
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
2011-04-26 15:35:29 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , timeout ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] content =
httpClient . POSTbytes (
new MultiProtocolURI ( " http:// " + address + " /yacy/transferRWI.html " ) ,
targetSeed . getHexHash ( ) + " .yacyh " ,
parts ,
gzipBody ) ;
2009-10-05 22:11:41 +02:00
final Iterator < String > v = FileUtils . strings ( content ) ;
2008-05-04 12:53:04 +02:00
// this should return a list of urlhashes that are unknown
2011-06-13 23:44:03 +02:00
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( v ) ;
2006-06-14 11:40:42 +02:00
// return the transfered index data in bytes (for debugging only)
2008-01-17 19:43:01 +01:00
result . put ( " indexPayloadSize " , Integer . toString ( entrypost . length ( ) ) ) ;
2005-04-07 21:19:42 +02:00
return result ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logInfo ( " yacyClient.transferRWI to " + address + " error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2011-12-06 02:24:51 +01:00
private static Map < String , String > transferURL (
final Seed targetSeed ,
2012-07-22 13:18:45 +02:00
final URIMetadata [ ] urls ,
2011-12-06 02:24:51 +01:00
boolean gzipBody ,
final int timeout ) {
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2007-04-30 00:05:34 +02:00
final String address = targetSeed . getPublicAddress ( ) ;
2011-12-06 02:24:51 +01:00
if ( address = = null ) {
return null ;
}
2007-07-05 01:48:52 +02:00
2005-04-07 21:19:42 +02:00
// prepare post values
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , targetSeed . hash , salt ) ;
2011-06-13 23:44:03 +02:00
2005-09-22 12:30:55 +02:00
// enabling gzip compression for post request body
2011-12-06 02:24:51 +01:00
if ( gzipBody & & ( targetSeed . getVersion ( ) < yacyVersion . YACY_SUPPORTS_GZIP_POST_REQUESTS_CHUNKED ) ) {
2008-05-04 12:53:04 +02:00
gzipBody = false ;
}
2011-06-13 23:44:03 +02:00
2010-04-08 02:11:32 +02:00
String resource ;
2005-04-07 21:19:42 +02:00
int urlc = 0 ;
2006-06-14 11:40:42 +02:00
int urlPayloadSize = 0 ;
2012-07-22 13:18:45 +02:00
for ( final URIMetadata url : urls ) {
2011-12-06 02:24:51 +01:00
if ( url ! = null ) {
2011-06-13 23:44:03 +02:00
resource = url . toString ( ) ;
2010-04-08 02:11:32 +02:00
//System.out.println("*** DEBUG resource = " + resource);
2011-12-06 02:24:51 +01:00
if ( resource ! = null & & resource . indexOf ( 0 ) = = - 1 ) {
2011-02-25 14:26:09 +01:00
parts . put ( " url " + urlc , UTF8 . StringBody ( resource ) ) ;
2006-06-14 11:40:42 +02:00
urlPayloadSize + = resource . length ( ) ;
2005-04-07 21:19:42 +02:00
urlc + + ;
}
}
}
2005-05-07 23:11:18 +02:00
try {
2011-02-25 14:26:09 +01:00
parts . put ( " urlc " , UTF8 . StringBody ( Integer . toString ( urlc ) ) ) ;
2011-04-26 13:46:31 +02:00
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
2011-04-26 15:35:29 +02:00
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , timeout ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] content =
httpClient . POSTbytes (
new MultiProtocolURI ( " http:// " + address + " /yacy/transferURL.html " ) ,
targetSeed . getHexHash ( ) + " .yacyh " ,
parts ,
gzipBody ) ;
2009-10-05 22:11:41 +02:00
final Iterator < String > v = FileUtils . strings ( content ) ;
2011-06-13 23:44:03 +02:00
2010-06-01 15:02:11 +02:00
final Map < String , String > result = FileUtils . table ( v ) ;
2006-06-14 11:40:42 +02:00
// return the transfered url data in bytes (for debugging only)
2011-06-13 23:44:03 +02:00
result . put ( " urlPayloadSize " , Integer . toString ( urlPayloadSize ) ) ;
2006-06-14 11:40:42 +02:00
return result ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.transferURL to " + address + " error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2005-09-21 23:32:43 +02:00
2011-10-04 11:06:24 +02:00
public static Map < String , String > getProfile ( final Seed targetSeed ) {
2011-05-18 16:26:28 +02:00
// ReferenceContainerCache<HostReference> ref = loadIDXHosts(targetSeed);
2011-06-13 23:44:03 +02:00
2005-05-07 23:11:18 +02:00
// this post a message to the remote message board
2008-04-12 10:12:51 +02:00
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2007-04-30 00:05:34 +02:00
String address = targetSeed . getClusterAddress ( ) ;
2011-12-06 02:24:51 +01:00
if ( address = = null ) {
address = " localhost:8090 " ;
}
2005-05-07 23:11:18 +02:00
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , targetSeed . hash , salt ) ;
2011-04-26 13:46:31 +02:00
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
2011-04-26 15:35:29 +02:00
final HTTPClient httpclient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , 5000 ) ;
2011-12-06 02:24:51 +01:00
final byte [ ] content =
httpclient . POSTbytes (
new MultiProtocolURI ( " http:// " + address + " /yacy/profile.html " ) ,
targetSeed . getHexHash ( ) + " .yacyh " ,
parts ,
false ) ;
2009-10-05 22:11:41 +02:00
return FileUtils . table ( content ) ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.getProfile error: " + e . getMessage ( ) ) ;
2005-04-07 21:19:42 +02:00
return null ;
}
}
2011-06-13 23:44:03 +02:00
2011-10-04 11:06:24 +02:00
public static ReferenceContainerCache < HostReference > loadIDXHosts ( final Seed target ) {
2011-12-06 02:24:51 +01:00
final ReferenceContainerCache < HostReference > index =
new ReferenceContainerCache < HostReference > (
WebStructureGraph . hostReferenceFactory ,
Base64Order . enhancedCoder ,
6 ) ;
2011-05-18 16:26:28 +02:00
// check if the host supports this protocol
2011-12-06 02:24:51 +01:00
if ( target . getRevision ( ) < migration . IDX_HOST ) {
2011-05-18 16:26:28 +02:00
// if the protocol is not supported then we just return an empty host reference container
return index ;
}
2011-06-13 23:44:03 +02:00
2011-05-18 16:26:28 +02:00
// prepare request
final String salt = crypt . randomSalt ( ) ;
2011-06-13 23:44:03 +02:00
2011-05-18 16:26:28 +02:00
// send request
try {
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > parts =
basicRequestParts ( Switchboard . getSwitchboard ( ) , target . hash , salt ) ;
2011-05-18 16:26:28 +02:00
parts . put ( " object " , UTF8 . StringBody ( " host " ) ) ;
final byte [ ] content = postToFile ( target , " idx.json " , parts , 30000 ) ;
2011-12-06 02:24:51 +01:00
if ( content = = null | | content . length = = 0 ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.loadIDXHosts error: empty result " ) ;
2011-05-18 16:26:28 +02:00
return null ;
}
2011-12-06 02:24:51 +01:00
final JSONObject json =
new JSONObject ( new JSONTokener ( new InputStreamReader ( new ByteArrayInputStream ( content ) ) ) ) ;
2011-05-18 16:26:28 +02:00
/ * the json has the following form :
{
" version " : " #[version]# " ,
" uptime " : " #[uptime]# " ,
" name " : " #[name]# " ,
" rowdef " : " #[rowdef]# " ,
" idx " : {
# { list } # " #[term]# " : [ # [ references ] # ] # ( comma ) # : : , # ( / comma ) #
# { / list } #
}
}
* /
2011-06-13 23:44:03 +02:00
final JSONObject idx = json . getJSONObject ( " idx " ) ;
2011-05-18 16:26:28 +02:00
// iterate over all references
2011-06-13 23:44:03 +02:00
final Iterator < String > termIterator = idx . keys ( ) ;
2011-05-18 16:26:28 +02:00
String term ;
2011-12-06 02:24:51 +01:00
while ( termIterator . hasNext ( ) ) {
2011-05-18 16:26:28 +02:00
term = termIterator . next ( ) ;
2011-06-13 23:44:03 +02:00
final JSONArray references = idx . getJSONArray ( term ) ;
2011-05-18 16:26:28 +02:00
// iterate until we get an exception or null
int c = 0 ;
String reference ;
2011-12-06 02:24:51 +01:00
final ReferenceContainer < HostReference > referenceContainer =
new ReferenceContainer < HostReference > (
WebStructureGraph . hostReferenceFactory ,
UTF8 . getBytes ( term ) ) ;
2011-05-18 16:26:28 +02:00
try {
2011-12-06 02:24:51 +01:00
while ( ( reference = references . getString ( c + + ) ) ! = null ) {
2011-05-18 16:26:28 +02:00
//System.out.println("REFERENCE: " + reference);
referenceContainer . add ( new HostReference ( reference ) ) ;
}
2011-12-06 02:24:51 +01:00
} catch ( final JSONException e ) {
} // this finishes the iteration
2011-05-18 16:26:28 +02:00
index . add ( referenceContainer ) ;
}
return index ;
2011-12-06 02:24:51 +01:00
} catch ( final Exception e ) {
2011-10-04 11:06:24 +02:00
Network . log . logWarning ( " yacyClient.loadIDXHosts error: " + e . getMessage ( ) ) ;
2011-05-18 16:26:28 +02:00
return index ;
}
}
2008-08-02 14:12:04 +02:00
public static void main ( final String [ ] args ) {
2011-12-06 02:24:51 +01:00
if ( args . length > 2 ) {
2010-09-14 15:35:47 +02:00
// search a remote peer. arguments:
// first arg: path to application home
// second arg: address of target peer
// third arg: search word or file name with list of search words
2011-01-28 11:54:13 +01:00
// i.e. /Data/workspace1/yacy/ localhost:8090 /Data/workspace1/yacy/test/words/searchtest.words
2010-09-14 15:35:47 +02:00
System . out . println ( " yacyClient Test " ) ;
2011-12-06 02:24:51 +01:00
final File searchwordfile = new File ( args [ 2 ] ) ;
final List < String > searchlines = new ArrayList < String > ( ) ;
if ( searchwordfile . exists ( ) ) {
Iterator < String > i ;
try {
i = FileUtils . strings ( FileUtils . read ( searchwordfile ) ) ;
while ( i . hasNext ( ) ) {
searchlines . add ( i . next ( ) ) ;
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
} catch ( final IOException e ) {
e . printStackTrace ( ) ;
System . exit ( - 1 ) ;
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
} else {
searchlines . add ( args [ 2 ] ) ;
}
for ( final String line : searchlines ) {
final byte [ ] wordhashe =
ASCII . getBytes ( QueryParams . hashSet2hashString ( Word . words2hashesHandles ( QueryParams
. cleanQuery ( line ) [ 0 ] ) ) ) ;
final long time = System . currentTimeMillis ( ) ;
SearchResult result ;
try {
result =
new SearchResult ( basicRequestParts ( ( String ) null , ( String ) null , " freeworld " ) , null , // sb.peers.mySeed(),
ASCII . String ( wordhashe ) ,
" " , // excludehashes,
" " , // urlhashes,
QueryParams . matchnothing_pattern , // prefer,
QueryParams . catchall_pattern , // filter,
" " , // modifier
" " , // language,
" " , // sitehash,
" " , // authorhash,
2012-04-22 02:05:17 +02:00
" all " , // contentdom,
2011-12-06 02:24:51 +01:00
10 , // count,
3000 , // time,
1000 , // maxDistance,
true , //global,
16 , // partitions,
" " ,
args [ 1 ] ,
null , //secondarySearchSuperviser,
2012-04-22 00:04:36 +02:00
new RankingProfile ( Classification . ContentDomain . TEXT ) , // rankingProfile,
2011-12-06 02:24:51 +01:00
null // constraint);
2010-09-14 15:35:47 +02:00
) ;
2011-12-06 02:24:51 +01:00
for ( final URIMetadataRow link : result . links ) {
2011-12-17 01:27:08 +01:00
System . out . println ( link . url ( ) . toNormalform ( true , false ) ) ;
2011-12-06 02:24:51 +01:00
System . out . println ( link . snippet ( ) ) ;
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
} catch ( final IOException e ) {
// TODO Auto-generated catch block
e . printStackTrace ( ) ;
2010-09-14 15:35:47 +02:00
}
2011-12-06 02:24:51 +01:00
System . out . println ( " Search Time: " + ( System . currentTimeMillis ( ) - time ) ) ;
}
2010-09-14 15:35:47 +02:00
System . exit ( 0 ) ;
2011-12-06 02:24:51 +01:00
} else if ( args . length = = 1 ) {
2008-05-04 12:53:04 +02:00
System . out . println ( " wput Test " ) ;
// connection params
2010-08-23 03:08:56 +02:00
MultiProtocolURI url = null ;
2008-05-04 12:53:04 +02:00
try {
2010-08-23 03:08:56 +02:00
url = new MultiProtocolURI ( args [ 0 ] ) ;
2011-12-06 02:24:51 +01:00
} catch ( final MalformedURLException e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2008-05-04 12:53:04 +02:00
}
2011-12-06 02:24:51 +01:00
if ( url = = null ) {
2008-05-04 12:53:04 +02:00
System . exit ( 1 ) ;
return ;
}
final String vhost = url . getHost ( ) ;
final int timeout = 10000 ;
2010-07-13 01:07:05 +02:00
// new data
2011-12-06 02:24:51 +01:00
final Map < String , ContentBody > newpost = new LinkedHashMap < String , ContentBody > ( ) ;
2011-02-25 14:26:09 +01:00
newpost . put ( " process " , UTF8 . StringBody ( " permission " ) ) ;
newpost . put ( " purpose " , UTF8 . StringBody ( " crcon " ) ) ;
2011-12-06 02:24:51 +01:00
byte [ ] res ;
try {
// res = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(url, timeout, vhost, newpost, true);
final HTTPClient httpClient = new HTTPClient ( ClientIdentification . getUserAgent ( ) , timeout ) ;
res = httpClient . POSTbytes ( url , vhost , newpost , true ) ;
System . out . println ( UTF8 . String ( res ) ) ;
} catch ( final IOException e1 ) {
Log . logException ( e1 ) ;
}
}
try {
net . yacy . cora . protocol . http . HTTPClient . closeConnectionManager ( ) ;
} catch ( final InterruptedException e ) {
Log . logException ( e ) ;
2008-05-04 12:53:04 +02:00
}
2005-04-07 21:19:42 +02:00
}
2005-09-21 23:32:43 +02:00
2011-10-04 11:06:24 +02:00
public static final boolean authentifyRequest ( final serverObjects post , final serverSwitch env ) {
2011-12-06 02:24:51 +01:00
if ( post = = null | | env = = null ) {
return false ;
}
2011-10-04 11:06:24 +02:00
// identify network
final String unitName = post . get ( SwitchboardConstants . NETWORK_NAME , Seed . DFLT_NETWORK_UNIT ) ; // the network unit
2011-12-06 02:24:51 +01:00
if ( ! unitName . equals ( env . getConfig ( SwitchboardConstants . NETWORK_NAME , Seed . DFLT_NETWORK_UNIT ) ) ) {
2011-10-04 11:06:24 +02:00
return false ;
}
// check authentication method
final String authenticationControl = env . getConfig ( " network.unit.protocol.control " , " uncontrolled " ) ;
2011-12-06 02:24:51 +01:00
if ( authenticationControl . equals ( " uncontrolled " ) ) {
return true ;
}
final String authenticationMethod =
env . getConfig ( " network.unit.protocol.request.authentication.method " , " " ) ;
2012-07-10 22:59:03 +02:00
if ( authenticationMethod . isEmpty ( ) ) {
2011-10-04 11:06:24 +02:00
return false ;
}
2011-12-06 02:24:51 +01:00
if ( authenticationMethod . equals ( " salted-magic-sim " ) ) {
2011-10-04 11:06:24 +02:00
// authorize the peer using the md5-magic
final String salt = post . get ( " key " , " " ) ;
final String iam = post . get ( " iam " , " " ) ;
final String magic = env . getConfig ( " network.unit.protocol.request.authentication.essentials " , " " ) ;
final String md5 = Digest . encodeMD5Hex ( salt + iam + magic ) ;
return post . get ( " magicmd5 " , " " ) . equals ( md5 ) ;
}
// unknown authentication method
return false ;
}
2011-12-06 02:24:51 +01:00
public static final LinkedHashMap < String , ContentBody > basicRequestParts (
final Switchboard sb ,
final String targetHash ,
final String salt ) {
2011-10-04 11:06:24 +02:00
// put in all the essentials for routing and network authentication
// generate a session key
2011-12-06 02:24:51 +01:00
final LinkedHashMap < String , ContentBody > parts =
basicRequestParts (
sb . peers . mySeed ( ) . hash ,
targetHash ,
Switchboard . getSwitchboard ( ) . getConfig (
SwitchboardConstants . NETWORK_NAME ,
Seed . DFLT_NETWORK_UNIT ) ) ;
2011-10-04 11:06:24 +02:00
parts . put ( " key " , UTF8 . StringBody ( salt ) ) ;
// authentication essentials
final String authenticationControl = sb . getConfig ( " network.unit.protocol.control " , " uncontrolled " ) ;
2011-12-06 02:24:51 +01:00
final String authenticationMethod =
sb . getConfig ( " network.unit.protocol.request.authentication.method " , " " ) ;
if ( ( authenticationControl . equals ( " controlled " ) ) & & ( authenticationMethod . length ( ) > 0 ) ) {
if ( authenticationMethod . equals ( " salted-magic-sim " ) ) {
2011-10-04 11:06:24 +02:00
// generate an authentication essential using the salt, the iam-hash and the network magic
2011-12-06 02:24:51 +01:00
final String magic =
sb . getConfig ( " network.unit.protocol.request.authentication.essentials " , " " ) ;
2011-10-04 11:06:24 +02:00
final String md5 = Digest . encodeMD5Hex ( salt + sb . peers . mySeed ( ) . hash + magic ) ;
parts . put ( " magicmd5 " , UTF8 . StringBody ( md5 ) ) ;
}
}
return parts ;
}
2011-12-06 02:24:51 +01:00
public static final LinkedHashMap < String , ContentBody > basicRequestParts (
final String myHash ,
final String targetHash ,
final String networkName ) {
2011-10-04 11:06:24 +02:00
// put in all the essentials for routing and network authentication
// generate a session key
2011-12-06 02:24:51 +01:00
final LinkedHashMap < String , ContentBody > parts = new LinkedHashMap < String , ContentBody > ( ) ;
2011-10-04 11:06:24 +02:00
// just standard identification essentials
2011-12-06 02:24:51 +01:00
if ( myHash ! = null ) {
2011-10-04 11:06:24 +02:00
parts . put ( " iam " , UTF8 . StringBody ( myHash ) ) ;
2011-12-06 02:24:51 +01:00
if ( targetHash ! = null ) {
parts . put ( " youare " , UTF8 . StringBody ( targetHash ) ) ;
}
2011-10-04 11:06:24 +02:00
// time information for synchronization
// use our own formatter to prevent concurrency locks with other processes
2011-12-06 02:24:51 +01:00
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter ( GenericFormatter . FORMAT_SHORT_SECOND , GenericFormatter . time_second ) ;
2011-10-04 11:06:24 +02:00
parts . put ( " mytime " , UTF8 . StringBody ( my_SHORT_SECOND_FORMATTER . format ( ) ) ) ;
parts . put ( " myUTC " , UTF8 . StringBody ( Long . toString ( System . currentTimeMillis ( ) ) ) ) ;
// network identification
parts . put ( SwitchboardConstants . NETWORK_NAME , UTF8 . StringBody ( networkName ) ) ;
}
return parts ;
}
2005-04-07 21:19:42 +02:00
}