2005-04-07 21:19:42 +02:00
// transferRWI.java
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004, 2005
2005-10-04 19:51:32 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
2005-05-12 19:50:45 +02:00
// You must compile this file with
2005-04-07 21:19:42 +02:00
// javac -classpath .:../classes transferRWI.java
2005-10-05 12:45:33 +02:00
import java.util.ArrayList ;
2005-05-05 07:36:42 +02:00
import java.util.HashSet ;
import java.util.Iterator ;
import de.anomic.http.httpHeader ;
import de.anomic.plasma.plasmaSwitchboard ;
import de.anomic.plasma.plasmaWordIndexEntry ;
2005-05-07 23:11:18 +02:00
import de.anomic.plasma.plasmaWordIndexEntryContainer ;
2005-05-05 07:36:42 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
import de.anomic.yacy.yacyCore ;
2005-08-16 02:31:15 +02:00
import de.anomic.yacy.yacySeed ;
2005-08-14 02:57:30 +02:00
import de.anomic.yacy.yacyDHTAction ;
2005-04-07 21:19:42 +02:00
2005-10-05 12:45:33 +02:00
public final class transferRWI {
2005-04-07 21:19:42 +02:00
2005-10-04 19:51:32 +02:00
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch ss ) {
2005-10-07 17:04:03 +02:00
if ( post = = null | | ss = = null ) { return null ; }
2005-10-04 19:51:32 +02:00
2005-10-05 12:45:33 +02:00
long start = System . currentTimeMillis ( ) ;
2005-10-07 17:04:03 +02:00
// return variable that accumulates replacements
2005-10-04 19:51:32 +02:00
final plasmaSwitchboard sb = ( plasmaSwitchboard ) ss ;
2005-10-07 17:04:03 +02:00
final serverObjects prop = new serverObjects ( ) ;
if ( prop = = null | | sb = = null ) { return null ; }
// request values
2005-10-04 19:51:32 +02:00
final String iam = ( String ) post . get ( " iam " , " " ) ; // seed hash of requester
// final String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability
// final String key = (String) post.get("key", ""); // transmission key
final int wordc = Integer . parseInt ( ( String ) post . get ( " wordc " , " " ) ) ; // number of different words
final int entryc = Integer . parseInt ( ( String ) post . get ( " entryc " , " " ) ) ; // number of entries in indexes
final byte [ ] indexes = ( ( String ) post . get ( " indexes " , " " ) ) . getBytes ( ) ; // the indexes, as list of word entries
final boolean granted = sb . getConfig ( " allowReceiveIndex " , " false " ) . equals ( " true " ) ;
2005-10-07 17:04:03 +02:00
2005-04-07 21:19:42 +02:00
// response values
String result = " " ;
2005-10-04 19:51:32 +02:00
StringBuffer unknownURLs = new StringBuffer ( ) ;
2005-10-07 17:04:03 +02:00
2005-10-04 19:51:32 +02:00
final yacySeed otherPeer = yacyCore . seedDB . get ( iam ) ;
final String otherPeerName = iam + " : " + ( ( otherPeer = = null ) ? " NULL " : ( otherPeer . getName ( ) + " / " + otherPeer . getVersion ( ) ) ) ;
2005-10-05 12:45:33 +02:00
2005-04-07 21:19:42 +02:00
if ( granted ) {
2005-10-04 02:28:59 +02:00
// log value status (currently added to find outOfMemory error
2005-10-04 19:51:32 +02:00
sb . getLog ( ) . logFine ( " Processing " + indexes . length + " bytes / " + wordc + " words / " + entryc + " entries from " + otherPeerName ) ;
final long startProcess = System . currentTimeMillis ( ) ;
2005-10-07 17:04:03 +02:00
2005-04-07 21:19:42 +02:00
// decode request
2005-10-05 12:45:33 +02:00
ArrayList v = new ArrayList ( ) ;
2005-04-07 21:19:42 +02:00
int s = 0 ;
int e ;
while ( s < indexes . length ) {
e = s ; while ( e < indexes . length ) if ( indexes [ e + + ] < 32 ) { e - - ; break ; }
if ( ( e - s ) > 0 ) v . add ( new String ( indexes , s , e - s ) ) ;
s = e ; while ( s < indexes . length ) if ( indexes [ s + + ] > = 32 ) { s - - ; break ; }
}
// the value-vector should now have the same length as entryc
2005-10-04 19:51:32 +02:00
if ( v . size ( ) ! = entryc ) sb . getLog ( ) . logSevere ( " ERROR WITH ENTRY COUNTER: v= " + v . size ( ) + " , entryc= " + entryc ) ;
2005-10-07 17:04:03 +02:00
2005-04-07 21:19:42 +02:00
// now parse the Strings in the value-vector and write index entries
String estring ;
int p ;
String wordHash ;
String urlHash ;
plasmaWordIndexEntry entry ;
2005-10-04 19:51:32 +02:00
final HashSet unknownURL = new HashSet ( ) ;
2005-04-07 21:19:42 +02:00
String [ ] wordhashes = new String [ v . size ( ) ] ;
int received = 0 ;
for ( int i = 0 ; i < v . size ( ) ; i + + ) {
2005-10-05 12:45:33 +02:00
estring = ( String ) v . get ( i ) ;
2005-04-07 21:19:42 +02:00
p = estring . indexOf ( " { " ) ;
if ( p > 0 ) {
wordHash = estring . substring ( 0 , p ) ;
wordhashes [ i ] = wordHash ;
entry = new plasmaWordIndexEntry ( estring . substring ( p ) ) ;
2005-10-10 11:28:28 +02:00
sb . wordIndex . addEntries ( plasmaWordIndexEntryContainer . instantContainer ( wordHash , System . currentTimeMillis ( ) , entry ) , true ) ;
2005-05-07 23:11:18 +02:00
urlHash = entry . getUrlHash ( ) ;
if ( ( ! ( unknownURL . contains ( urlHash ) ) ) & &
2005-10-04 19:51:32 +02:00
( ! ( sb . urlPool . loadedURL . exists ( urlHash ) ) ) ) {
2005-05-07 23:11:18 +02:00
unknownURL . add ( urlHash ) ;
2005-04-07 21:19:42 +02:00
}
2005-05-07 23:11:18 +02:00
received + + ;
2005-04-07 21:19:42 +02:00
}
}
yacyCore . seedDB . mySeed . incRI ( received ) ;
2005-10-07 17:04:03 +02:00
2005-04-07 21:19:42 +02:00
// finally compose the unknownURL hash list
2005-10-04 19:51:32 +02:00
final Iterator it = unknownURL . iterator ( ) ;
while ( it . hasNext ( ) ) {
unknownURLs . append ( " , " ) . append ( ( String ) it . next ( ) ) ;
}
if ( unknownURLs . length ( ) > 0 ) { unknownURLs . delete ( 0 , 1 ) ; }
if ( wordhashes . length = = 0 ) {
sb . getLog ( ) . logInfo ( " Received 0 RWIs from " + otherPeerName + " , processed in " + ( System . currentTimeMillis ( ) - startProcess ) + " milliseconds, requesting " + unknownURL . size ( ) + " URLs " ) ;
} else {
final double avdist = ( yacyDHTAction . dhtDistance ( yacyCore . seedDB . mySeed . hash , wordhashes [ 0 ] ) + yacyDHTAction . dhtDistance ( yacyCore . seedDB . mySeed . hash , wordhashes [ wordhashes . length - 1 ] ) ) / 2 . 0 ;
sb . getLog ( ) . logInfo ( " Received " + received + " Words [ " + wordhashes [ 0 ] + " .. " + wordhashes [ wordhashes . length - 1 ] + " ]/ " + avdist + " from " + otherPeerName + " , processed in " + ( System . currentTimeMillis ( ) - startProcess ) + " milliseconds, requesting " + unknownURL . size ( ) + " URLs " ) ;
2005-08-14 02:57:30 +02:00
}
2005-04-07 21:19:42 +02:00
result = " ok " ;
} else {
2005-10-04 19:51:32 +02:00
sb . getLog ( ) . logInfo ( " Rejecting RWIs from peer " + otherPeerName + " . Not granted. " ) ;
2005-04-07 21:19:42 +02:00
result = " error_not_granted " ;
}
2005-10-07 17:04:03 +02:00
2005-10-04 19:51:32 +02:00
prop . put ( " unknownURL " , unknownURLs . toString ( ) ) ;
2005-04-07 21:19:42 +02:00
prop . put ( " result " , result ) ;
2005-10-07 17:04:03 +02:00
// return rewrite properties
return prop ;
2005-04-07 21:19:42 +02:00
}
2005-10-07 17:04:03 +02:00
}