2005-10-05 15:16:53 +02:00
// transferURL.java
2005-04-07 21:19:42 +02:00
// -----------------------
// part of the AnomicHTTPD caching proxy
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2005-04-07 21:19:42 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004, 2005
2005-10-05 15:16:53 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2005-05-12 19:50:45 +02:00
// You must compile this file with
2005-04-07 21:19:42 +02:00
// javac -classpath .:../classes transferRWI.java
2006-09-07 03:13:03 +02:00
import java.io.IOException ;
2006-12-08 03:14:56 +01:00
import java.text.ParseException ;
2013-05-17 13:59:37 +02:00
import java.util.HashMap ;
import java.util.Map ;
import java.util.Set ;
2006-09-07 03:13:03 +02:00
2011-01-03 21:52:54 +01:00
import net.yacy.cora.date.GenericFormatter ;
2013-09-15 00:30:23 +02:00
import net.yacy.cora.document.encoding.ASCII ;
import net.yacy.cora.document.feed.RSSMessage ;
2010-08-23 14:32:02 +02:00
import net.yacy.cora.protocol.RequestHeader ;
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2012-09-21 15:48:16 +02:00
import net.yacy.crawler.data.ResultURLs ;
import net.yacy.crawler.data.ResultURLs.EventOrigin ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.URIMetadataRow ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.EventChannel ;
import net.yacy.peers.Network ;
2011-12-17 01:27:08 +01:00
import net.yacy.peers.Protocol ;
import net.yacy.peers.Seed ;
2012-06-11 00:17:30 +02:00
import net.yacy.repository.Blacklist.BlacklistType ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
2013-11-13 13:38:01 +01:00
import net.yacy.search.SwitchboardConstants ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
2005-04-07 21:19:42 +02:00
2005-10-05 12:45:33 +02:00
public final class transferURL {
2011-10-04 11:06:24 +02:00
2013-12-23 01:23:40 +01:00
public static serverObjects respond ( @SuppressWarnings ( " unused " ) final RequestHeader header , final serverObjects post , final serverSwitch env ) {
2008-08-02 14:12:04 +02:00
final long start = System . currentTimeMillis ( ) ;
2006-12-08 03:14:56 +01:00
long freshdate = 0 ;
2011-01-03 21:52:54 +01:00
try { freshdate = GenericFormatter . SHORT_DAY_FORMATTER . parse ( " 20061101 " ) . getTime ( ) ; } catch ( final ParseException e1 ) { }
2011-10-04 11:06:24 +02:00
2008-03-12 01:05:18 +01:00
// return variable that accumulates replacements
2009-07-19 22:37:44 +02:00
final Switchboard sb = ( Switchboard ) env ;
2008-03-12 01:05:18 +01:00
final serverObjects prop = new serverObjects ( ) ;
if ( ( post = = null ) | | ( env = = null ) ) return prop ;
2011-10-04 11:06:24 +02:00
if ( ! Protocol . authentifyRequest ( post , env ) ) return prop ;
2005-10-05 15:16:53 +02:00
// request values
2008-03-12 01:05:18 +01:00
final String iam = post . get ( " iam " , " " ) ; // seed hash of requester
final String youare = post . get ( " youare " , " " ) ; // seed hash of the target peer, needed for network stability
// final String key = post.get("key", ""); // transmission key
final int urlc = post . getInt ( " urlc " , 0 ) ; // number of transported urls
2011-03-15 02:03:35 +01:00
final boolean granted = sb . getConfigBool ( " allowReceiveIndex " , false ) ;
2013-11-13 13:38:01 +01:00
final boolean blockBlacklist = sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_BLOCK_BLACKLIST , false ) ;
2005-07-11 17:36:10 +02:00
2005-04-07 21:19:42 +02:00
// response values
String result = " " ;
String doublevalues = " 0 " ;
2005-10-05 15:16:53 +02:00
2011-10-04 11:06:24 +02:00
final Seed otherPeer = sb . peers . get ( iam ) ;
2008-03-12 01:05:18 +01:00
final String otherPeerName = iam + " : " + ( ( otherPeer = = null ) ? " NULL " : ( otherPeer . getName ( ) + " / " + otherPeer . getVersion ( ) ) ) ;
2009-05-28 16:26:05 +02:00
if ( ( youare = = null ) | | ( ! youare . equals ( sb . peers . mySeed ( ) . hash ) ) ) {
2013-07-09 14:28:25 +02:00
Network . log . info ( " Rejecting URLs from peer " + otherPeerName + " . Wrong target. Wanted peer= " + youare + " , iam= " + sb . peers . mySeed ( ) . hash ) ;
2007-05-01 01:21:13 +02:00
result = " wrong_target " ;
} else if ( ( ! granted ) | | ( sb . isRobinsonMode ( ) ) ) {
2013-07-09 14:28:25 +02:00
Network . log . info ( " Rejecting URLs from peer " + otherPeerName + " . Not granted. " ) ;
2007-04-24 17:11:12 +02:00
result = " error_not_granted " ;
} else {
2005-04-07 21:19:42 +02:00
int received = 0 ;
2006-08-07 13:42:00 +02:00
int blocked = 0 ;
2012-08-18 13:18:51 +02:00
int doublecheck = 0 ;
2005-04-07 21:19:42 +02:00
// read the urls from the other properties and store
String urls ;
2012-10-17 17:45:41 +02:00
URIMetadataRow lEntry ;
2013-05-17 13:59:37 +02:00
Map < String , URIMetadataRow > lEm = new HashMap < String , URIMetadataRow > ( ) ;
2005-04-07 21:19:42 +02:00
for ( int i = 0 ; i < urlc ; i + + ) {
2011-10-04 11:06:24 +02:00
2006-12-08 03:14:56 +01:00
// read new lurl-entry
2008-06-06 18:01:27 +02:00
urls = post . get ( " url " + i ) ;
2005-07-11 17:36:10 +02:00
if ( urls = = null ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " transferURL: got null URL-string from peer " + otherPeerName ) ;
2007-07-24 02:46:17 +02:00
blocked + + ;
2006-12-08 03:14:56 +01:00
continue ;
}
// parse new lurl-entry
2009-10-11 02:12:19 +02:00
lEntry = URIMetadataRow . importEntry ( urls ) ;
2006-12-08 03:14:56 +01:00
if ( lEntry = = null ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isWarn ( ) ) Network . log . warn ( " transferURL: received invalid URL (entry null) from peer " + otherPeerName + " \ n \ tURL Property: " + urls ) ;
2007-07-24 02:46:17 +02:00
blocked + + ;
2006-12-08 03:14:56 +01:00
continue ;
}
2011-10-04 11:06:24 +02:00
2006-12-08 03:14:56 +01:00
// check if entry is well-formed
2011-12-17 01:27:08 +01:00
if ( lEntry . url ( ) = = null ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isWarn ( ) ) Network . log . warn ( " transferURL: received invalid URL from peer " + otherPeerName + " \ n \ tURL Property: " + urls ) ;
2007-07-24 02:46:17 +02:00
blocked + + ;
2006-12-08 03:14:56 +01:00
continue ;
}
2011-10-04 11:06:24 +02:00
2007-01-17 06:31:27 +01:00
// check whether entry is too old
if ( lEntry . freshdate ( ) . getTime ( ) < = freshdate ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " transerURL: received too old URL from peer " + otherPeerName + " : " + lEntry . freshdate ( ) ) ;
2007-07-24 02:46:17 +02:00
blocked + + ;
2007-01-17 06:31:27 +01:00
continue ;
}
2011-10-04 11:06:24 +02:00
2013-05-26 03:24:32 +02:00
// check if the entry is blacklisted
2012-06-11 00:17:30 +02:00
if ( ( blockBlacklist ) & & ( Switchboard . urlBlacklist . isListed ( BlacklistType . DHT , lEntry ) ) ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " transferURL: blocked blacklisted URL ' " + lEntry . url ( ) . toNormalform ( false ) + " ' from peer " + otherPeerName ) ;
2006-12-08 03:14:56 +01:00
lEntry = null ;
blocked + + ;
continue ;
}
2011-10-04 11:06:24 +02:00
2007-07-24 02:46:17 +02:00
// check if the entry is in our network domain
2011-12-17 01:27:08 +01:00
final String urlRejectReason = sb . crawlStacker . urlInAcceptedDomain ( lEntry . url ( ) ) ;
2008-04-20 23:36:25 +02:00
if ( urlRejectReason ! = null ) {
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " transferURL: blocked URL ' " + lEntry . url ( ) + " ' ( " + urlRejectReason + " ) from peer " + otherPeerName ) ;
2007-07-24 02:46:17 +02:00
lEntry = null ;
blocked + + ;
continue ;
}
2011-10-04 11:06:24 +02:00
2013-05-17 13:59:37 +02:00
lEm . put ( ASCII . String ( lEntry . hash ( ) ) , lEntry ) ;
}
2013-05-26 03:24:32 +02:00
Set < String > doubles = sb . index . exists ( lEm . keySet ( ) ) ;
doublecheck = doubles . size ( ) ;
for ( String id : lEm . keySet ( ) ) {
if ( ! doubles . contains ( id ) ) {
lEntry = lEm . get ( id ) ;
// write entry to database
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " Accepting URL from peer " + otherPeerName + " : " + lEntry . url ( ) . toNormalform ( true ) ) ;
2013-05-26 03:24:32 +02:00
try {
2013-05-29 18:27:27 +02:00
sb . index . fulltext ( ) . putMetadata ( lEntry ) ;
2013-05-26 03:24:32 +02:00
ResultURLs . stack ( ASCII . String ( lEntry . url ( ) . hash ( ) ) , lEntry . url ( ) . getHost ( ) , iam . getBytes ( ) , iam . getBytes ( ) , EventOrigin . DHT_TRANSFER ) ;
2013-07-09 14:28:25 +02:00
if ( Network . log . isFine ( ) ) Network . log . fine ( " transferURL: received URL ' " + lEntry . url ( ) . toNormalform ( false ) + " ' from peer " + otherPeerName ) ;
2013-05-26 03:24:32 +02:00
received + + ;
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2013-05-26 03:24:32 +02:00
}
2005-04-07 21:19:42 +02:00
}
}
2005-10-05 15:16:53 +02:00
2009-05-28 16:26:05 +02:00
sb . peers . mySeed ( ) . incRU ( received ) ;
2005-10-05 15:16:53 +02:00
2005-04-07 21:19:42 +02:00
// return rewrite properties
2013-07-09 14:28:25 +02:00
Network . log . info ( " Received " + received + " URLs from peer " + otherPeerName + " in " + ( System . currentTimeMillis ( ) - start ) + " ms, blocked " + blocked + " URLs " ) ;
2011-10-04 11:06:24 +02:00
EventChannel . channels ( EventChannel . DHTRECEIVE ) . addMessage ( new RSSMessage ( " Received " + received + " , blocked " + blocked + " URLs from peer " + otherPeerName , " " , otherPeer . hash ) ) ;
2012-08-18 13:18:51 +02:00
if ( doublecheck > 0 ) {
2013-07-09 14:28:25 +02:00
Network . log . warn ( " Received " + doublecheck + " / " + urlc + " double URLs from peer " + otherPeerName ) ; // double should not happen because we demanded only documents which we do not have yet
2012-08-18 13:18:51 +02:00
doublevalues = Integer . toString ( doublecheck ) ;
}
2005-04-07 21:19:42 +02:00
result = " ok " ;
}
2005-10-05 15:16:53 +02:00
2007-10-24 23:38:19 +02:00
prop . put ( " double " , doublevalues ) ;
prop . put ( " result " , result ) ;
2005-10-05 15:16:53 +02:00
return prop ;
2005-04-07 21:19:42 +02:00
}
2006-03-17 00:58:44 +01:00
}