2009-08-24 17:24:02 +02:00
// SearchEvent.java
2007-08-06 02:56:56 +02:00
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 10.10.2005 on http://yacy.net
2005-10-10 02:32:15 +02:00
//
2007-08-06 02:56:56 +02:00
// This is a part of YaCy, a peer-to-peer based web search engine
//
2009-09-05 22:41:21 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2007-08-06 02:56:56 +02:00
//
// LICENSE
//
2005-10-10 02:32:15 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2009-07-09 00:14:57 +02:00
package de.anomic.search ;
2005-10-10 02:32:15 +02:00
2009-08-26 17:59:55 +02:00
import java.io.IOException ;
2007-08-26 20:18:35 +02:00
import java.util.ArrayList ;
2005-10-11 09:06:33 +02:00
import java.util.Iterator ;
2006-09-11 00:36:47 +02:00
import java.util.Map ;
2006-09-11 12:39:25 +02:00
import java.util.TreeMap ;
2009-08-26 17:59:55 +02:00
import java.util.TreeSet ;
2005-10-12 14:28:49 +02:00
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.word.WordReference ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Base64Order ;
2009-10-10 02:39:15 +02:00
import net.yacy.kelondro.rwi.ReferenceContainer ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.kelondro.util.SetTools ;
2009-10-10 01:13:30 +02:00
2008-05-06 02:32:41 +02:00
import de.anomic.crawler.ResultURLs ;
2009-07-09 00:14:57 +02:00
import de.anomic.search.RankingProcess.NavigatorEntry ;
2007-11-17 02:53:02 +01:00
import de.anomic.server.serverProfiling ;
2005-10-13 15:57:15 +02:00
import de.anomic.yacy.yacySearch ;
2009-05-28 16:26:05 +02:00
import de.anomic.yacy.yacySeedDB ;
replaced old DHT transmission method with new method. Many things have changed! some of them:
- after a index selection is made, the index is splitted into its vertical components
- from differrent index selctions the splitted components can be accumulated before they are placed into the transmission queue
- each splitted chunk gets its own transmission thread
- multiple transmission threads are started concurrently
- the process can be monitored with the blocking queue servlet
To implement that, a new package de.anomic.yacy.dht was created. Some old files have been removed.
The new index distribution model using a vertical DHT was implemented. An abstraction of this model
is implemented in the new dht package as interface. The freeworld network has now a configuration
of two vertial partitions; sixteen partitions are planned and will be configured if the process is bug-free.
This modification has three main targets:
- enhance the DHT transmission speed
- with a vertical DHT, a search will speed up. With two partitions, two times. With sixteen, sixteen times.
- the vertical DHT will apply a semi-dht for URLs, and peers will receive a fraction of the overall URLs they received before.
with two partitions, the fractions will be halve. With sixteen partitions, a 1/16 of the previous number of URLs.
BE CAREFULL, THIS IS A MAJOR CODE CHANGE, POSSIBLY FULL OF BUGS AND HARMFUL THINGS.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5586 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-02-10 01:06:59 +01:00
import de.anomic.yacy.dht.FlatWordPartitionScheme ;
2009-07-19 22:37:44 +02:00
import de.anomic.ymage.ProfilingGraph ;
2005-10-10 02:32:15 +02:00
2009-08-24 17:24:02 +02:00
public final class SearchEvent {
2005-10-10 02:32:15 +02:00
2008-03-23 01:55:04 +01:00
public static final String INITIALIZATION = " initialization " ;
2007-11-17 02:53:02 +01:00
public static final String COLLECTION = " collection " ;
public static final String JOIN = " join " ;
public static final String PRESORT = " presort " ;
public static final String URLFETCH = " urlfetch " ;
public static final String NORMALIZING = " normalizing " ;
2008-03-23 01:55:04 +01:00
public static final String FINALIZATION = " finalization " ;
2007-11-17 02:53:02 +01:00
2009-06-15 13:49:00 +02:00
private static final int max_results_preparation = 1000 ;
2007-08-25 01:12:59 +02:00
2009-08-26 17:59:55 +02:00
// class variables that may be implemented with an abstract class
private long eventTime ;
private QueryParams query ;
private final Segment indexSegment ;
2009-05-28 16:26:05 +02:00
private final yacySeedDB peers ;
2009-08-26 17:59:55 +02:00
private RankingProcess rankedCache ; // ordered search results, grows dynamically as all the query threads enrich this container
2009-08-30 12:28:23 +02:00
private ResultFetcher results ;
2009-08-26 17:59:55 +02:00
// class variables for search abstracts
2009-08-24 17:24:02 +02:00
private final IndexAbstracts rcAbstracts ; // cache for index abstracts; word:TreeMap mapping where the embedded TreeMap is a urlhash:peerlist relation
2009-08-26 17:59:55 +02:00
// class variables for remote searches
2006-09-13 19:13:28 +02:00
private yacySearch [ ] primarySearchThreads , secondarySearchThreads ;
2009-04-16 17:29:00 +02:00
private final TreeMap < byte [ ] , String > preselectedPeerHashes ;
2009-08-26 17:59:55 +02:00
private ResultURLs crawlResults ;
private Thread localSearchThread ;
private TreeMap < byte [ ] , String > IAResults ;
private TreeMap < byte [ ] , Integer > IACount ;
private byte [ ] IAmaxcounthash , IAneardhthash ;
2005-10-12 14:28:49 +02:00
2009-08-26 17:59:55 +02:00
@SuppressWarnings ( " unchecked " ) SearchEvent ( final QueryParams query ,
2009-05-28 16:26:05 +02:00
final Segment indexSegment ,
final yacySeedDB peers ,
2009-05-08 21:26:36 +02:00
final ResultURLs crawlResults ,
2009-04-16 17:29:00 +02:00
final TreeMap < byte [ ] , String > preselectedPeerHashes ,
2009-05-08 21:26:36 +02:00
final boolean generateAbstracts ) {
2007-08-25 01:12:59 +02:00
this . eventTime = System . currentTimeMillis ( ) ; // for lifetime check
2009-05-28 16:26:05 +02:00
this . indexSegment = indexSegment ;
this . peers = peers ;
2008-03-26 15:13:05 +01:00
this . crawlResults = crawlResults ;
2005-10-11 09:06:33 +02:00
this . query = query ;
2009-08-24 17:24:02 +02:00
this . rcAbstracts = ( query . queryHashes . size ( ) > 1 ) ? new IndexAbstracts ( ) : null ; // generate abstracts only for combined searches
2006-09-13 19:13:28 +02:00
this . primarySearchThreads = null ;
this . secondarySearchThreads = null ;
2007-04-26 11:51:51 +02:00
this . preselectedPeerHashes = preselectedPeerHashes ;
2009-04-16 17:29:00 +02:00
this . IAResults = new TreeMap < byte [ ] , String > ( Base64Order . enhancedCoder ) ;
this . IACount = new TreeMap < byte [ ] , Integer > ( Base64Order . enhancedCoder ) ;
2007-08-28 14:15:46 +02:00
this . IAmaxcounthash = null ;
this . IAneardhthash = null ;
2008-02-01 00:40:47 +01:00
this . localSearchThread = null ;
2007-08-25 01:12:59 +02:00
2008-08-02 14:12:04 +02:00
final long start = System . currentTimeMillis ( ) ;
2009-07-09 00:14:57 +02:00
if ( ( query . domType = = QueryParams . SEARCHDOM_GLOBALDHT ) | |
( query . domType = = QueryParams . SEARCHDOM_CLUSTERALL ) ) {
2009-09-04 01:09:53 +02:00
final int fetchpeers = 12 ;
2009-06-04 22:58:47 +02:00
// initialize a ranking process that is the target for data
// that is generated concurrently from local and global search threads
2009-09-04 01:09:53 +02:00
this . rankedCache = new RankingProcess ( indexSegment , query , max_results_preparation , fetchpeers + 1 ) ;
2007-09-04 01:43:55 +02:00
2009-08-24 17:24:02 +02:00
// start a local search concurrently
this . rankedCache . start ( ) ;
2009-06-04 22:58:47 +02:00
// start global searches
2008-08-02 14:12:04 +02:00
final long timer = System . currentTimeMillis ( ) ;
2009-01-31 00:33:47 +01:00
Log . logFine ( " SEARCH_EVENT " , " STARTING " + fetchpeers + " THREADS TO CATCH EACH " + query . displayResults ( ) + " URLs " ) ;
2009-09-02 15:41:56 +02:00
this . primarySearchThreads = ( query . queryHashes . size ( ) = = 0 ) ? null : yacySearch . primaryRemoteSearches (
2009-07-09 00:14:57 +02:00
QueryParams . hashSet2hashString ( query . queryHashes ) ,
QueryParams . hashSet2hashString ( query . excludeHashes ) ,
2007-08-25 01:12:59 +02:00
" " ,
query . prefer ,
query . urlMask ,
2009-06-09 11:07:52 +02:00
query . targetlang = = null ? " " : query . targetlang ,
query . sitehash = = null ? " " : query . sitehash ,
query . authorhash = = null ? " " : query . authorhash ,
2007-09-04 01:43:55 +02:00
query . displayResults ( ) ,
2007-08-25 01:12:59 +02:00
query . maxDistance ,
2009-05-28 16:26:05 +02:00
indexSegment ,
peers ,
2008-03-26 15:13:05 +01:00
crawlResults ,
rankedCache ,
2007-08-25 01:12:59 +02:00
rcAbstracts ,
fetchpeers ,
2009-07-19 22:37:44 +02:00
Switchboard . urlBlacklist ,
2008-01-08 21:12:31 +01:00
query . ranking ,
2007-08-25 01:12:59 +02:00
query . constraint ,
2009-07-09 00:14:57 +02:00
( query . domType = = QueryParams . SEARCHDOM_GLOBALDHT ) ? null : preselectedPeerHashes ) ;
2009-09-05 22:31:39 +02:00
if ( this . primarySearchThreads ! = null ) {
if ( this . primarySearchThreads . length > fetchpeers ) this . rankedCache . moreFeeders ( this . primarySearchThreads . length - fetchpeers ) ;
serverProfiling . update ( " SEARCH " , new ProfilingGraph . searchEvent ( query . id ( true ) , " remote search thread start " , this . primarySearchThreads . length , System . currentTimeMillis ( ) - timer ) , false ) ;
// finished searching
Log . logFine ( " SEARCH_EVENT " , " SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads . length + " PEERS: " + ( ( System . currentTimeMillis ( ) - start ) / 1000 ) + " seconds " ) ;
} else {
// no search since query is empty, user might have entered no data or filters have removed all search words
Log . logFine ( " SEARCH_EVENT " , " NO SEARCH STARTED DUE TO EMPTY SEARCH REQUEST. " ) ;
}
2009-09-18 11:19:52 +02:00
// start worker threads to fetch urls and snippets
this . results = new ResultFetcher ( rankedCache , query , indexSegment , peers , 10000 ) ;
2007-08-25 01:12:59 +02:00
} else {
2007-11-17 02:53:02 +01:00
// do a local search
2009-07-09 00:14:57 +02:00
this . rankedCache = new RankingProcess ( indexSegment , query , max_results_preparation , 2 ) ;
2009-08-27 17:19:48 +02:00
this . rankedCache . run ( ) ;
2009-05-28 16:26:05 +02:00
//CrawlSwitchboard.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
2007-08-25 01:12:59 +02:00
2007-08-28 14:15:46 +02:00
if ( generateAbstracts ) {
// compute index abstracts
2008-08-02 14:12:04 +02:00
final long timer = System . currentTimeMillis ( ) ;
2007-08-28 14:15:46 +02:00
int maxcount = - 1 ;
2008-11-03 01:27:23 +01:00
long mindhtdistance = Long . MAX_VALUE , l ;
2009-04-16 17:29:00 +02:00
byte [ ] wordhash ;
2009-06-03 10:49:54 +02:00
assert this . rankedCache . searchContainerMap ( ) ! = null ;
for ( Map . Entry < byte [ ] , ReferenceContainer < WordReference > > entry : this . rankedCache . searchContainerMap ( ) . entrySet ( ) ) {
2008-01-18 18:14:02 +01:00
wordhash = entry . getKey ( ) ;
2009-03-02 00:58:14 +01:00
final ReferenceContainer container = entry . getValue ( ) ;
2009-09-18 23:49:38 +02:00
assert ( Base64Order . enhancedCoder . equal ( container . getTermHash ( ) , wordhash ) ) : " container.getTermHash() = " + new String ( container . getTermHash ( ) ) + " , wordhash = " + new String ( wordhash ) ;
2007-08-28 14:15:46 +02:00
if ( container . size ( ) > maxcount ) {
IAmaxcounthash = wordhash ;
maxcount = container . size ( ) ;
}
2009-05-28 16:26:05 +02:00
l = FlatWordPartitionScheme . std . dhtDistance ( wordhash , null , peers . mySeed ( ) ) ;
2008-11-03 01:27:23 +01:00
if ( l < mindhtdistance ) {
2007-08-28 14:15:46 +02:00
// calculate the word hash that is closest to our dht position
2008-11-03 01:27:23 +01:00
mindhtdistance = l ;
2007-08-28 14:15:46 +02:00
IAneardhthash = wordhash ;
}
2008-08-06 21:43:12 +02:00
IACount . put ( wordhash , Integer . valueOf ( container . size ( ) ) ) ;
2009-03-02 00:58:14 +01:00
IAResults . put ( wordhash , ReferenceContainer . compressIndex ( container , null , 1000 ) . toString ( ) ) ;
2007-08-28 14:15:46 +02:00
}
2009-07-19 22:37:44 +02:00
serverProfiling . update ( " SEARCH " , new ProfilingGraph . searchEvent ( query . id ( true ) , " abstract generation " , this . rankedCache . searchContainerMap ( ) . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
2007-08-28 14:15:46 +02:00
}
2009-09-18 11:19:52 +02:00
// start worker threads to fetch urls and snippets
this . results = new ResultFetcher ( rankedCache , query , indexSegment , peers , 10 ) ;
2007-08-25 01:12:59 +02:00
}
2009-09-18 11:19:52 +02:00
2007-09-07 13:45:38 +02:00
// clean up events
2009-08-24 17:24:02 +02:00
SearchEventCache . cleanupEvents ( false ) ;
2009-07-19 22:37:44 +02:00
serverProfiling . update ( " SEARCH " , new ProfilingGraph . searchEvent ( query . id ( true ) , " event-cleanup " , 0 , 0 ) , false ) ;
2007-08-25 01:12:59 +02:00
// store this search to a cache so it can be re-used
2009-08-24 17:24:02 +02:00
if ( MemoryControl . available ( ) < 1024 * 1024 * 10 ) SearchEventCache . cleanupEvents ( true ) ;
2009-08-26 17:59:55 +02:00
SearchEventCache . put ( query . id ( false ) , this ) ;
2005-10-12 14:28:49 +02:00
}
2009-08-26 17:59:55 +02:00
public long getEventTime ( ) {
return this . eventTime ;
}
public void resetEventTime ( ) {
this . eventTime = System . currentTimeMillis ( ) ;
}
public QueryParams getQuery ( ) {
return this . query ;
}
public void setQuery ( QueryParams query ) {
this . query = query ;
2009-08-30 12:28:23 +02:00
this . results . query = query ;
2009-08-26 17:59:55 +02:00
}
public void cleanup ( ) {
// execute deletion of failed words
2009-08-30 12:28:23 +02:00
int rw = this . results . failedURLs . size ( ) ;
2009-08-26 17:59:55 +02:00
if ( rw > 0 ) {
final TreeSet < byte [ ] > removeWords = query . queryHashes ;
removeWords . addAll ( query . excludeHashes ) ;
try {
final Iterator < byte [ ] > j = removeWords . iterator ( ) ;
// remove the same url hashes for multiple words
while ( j . hasNext ( ) ) {
2009-08-30 12:28:23 +02:00
this . indexSegment . termIndex ( ) . remove ( j . next ( ) , this . results . failedURLs . keySet ( ) ) ;
2009-08-26 17:59:55 +02:00
}
} catch ( IOException e ) {
e . printStackTrace ( ) ;
}
Log . logInfo ( " SearchEvents " , " cleaning up event " + query . id ( true ) + " , removed " + rw + " URL references on " + removeWords . size ( ) + " words " ) ;
}
}
public Iterator < Map . Entry < byte [ ] , String > > abstractsString ( ) {
return this . IAResults . entrySet ( ) . iterator ( ) ;
}
public String abstractsString ( byte [ ] hash ) {
return this . IAResults . get ( hash ) ;
}
public Iterator < Map . Entry < byte [ ] , Integer > > abstractsCount ( ) {
return this . IACount . entrySet ( ) . iterator ( ) ;
}
public int abstractsCount ( byte [ ] hash ) {
Integer i = this . IACount . get ( hash ) ;
if ( i = = null ) return - 1 ;
return i . intValue ( ) ;
}
public byte [ ] getAbstractsMaxCountHash ( ) {
return this . IAmaxcounthash ;
}
public byte [ ] getAbstractsNearDHTHash ( ) {
return this . IAneardhthash ;
}
boolean anyRemoteSearchAlive ( ) {
2007-09-08 13:50:19 +02:00
// check primary search threads
if ( ( this . primarySearchThreads ! = null ) & & ( this . primarySearchThreads . length ! = 0 ) ) {
for ( int i = 0 ; i < this . primarySearchThreads . length ; i + + ) {
if ( ( this . primarySearchThreads [ i ] ! = null ) & & ( this . primarySearchThreads [ i ] . isAlive ( ) ) ) return true ;
}
}
2007-12-12 15:18:42 +01:00
// maybe a secondary search thread is alive, check this
2007-09-08 13:50:19 +02:00
if ( ( this . secondarySearchThreads ! = null ) & & ( this . secondarySearchThreads . length ! = 0 ) ) {
2007-10-04 10:50:33 +02:00
for ( int i = 0 ; i < this . secondarySearchThreads . length ; i + + ) {
2007-09-08 13:50:19 +02:00
if ( ( this . secondarySearchThreads [ i ] ! = null ) & & ( this . secondarySearchThreads [ i ] . isAlive ( ) ) ) return true ;
}
}
return false ;
}
2006-09-13 19:13:28 +02:00
public yacySearch [ ] getPrimarySearchThreads ( ) {
return primarySearchThreads ;
}
2007-11-07 23:38:09 +01:00
2006-09-13 19:13:28 +02:00
public yacySearch [ ] getSecondarySearchThreads ( ) {
return secondarySearchThreads ;
2005-10-24 02:34:15 +02:00
}
2009-07-09 00:14:57 +02:00
public RankingProcess getRankingResult ( ) {
2008-01-30 22:58:30 +01:00
return this . rankedCache ;
2007-01-15 02:50:57 +01:00
}
2007-09-04 01:43:55 +02:00
2009-06-02 17:20:10 +02:00
public ArrayList < NavigatorEntry > getHostNavigator ( int maxentries ) {
2009-06-04 01:49:06 +02:00
return this . rankedCache . getHostNavigator ( maxentries ) ;
}
public ArrayList < NavigatorEntry > getTopicNavigator ( final int maxentries ) {
// returns a set of words that are computed as toplist
return this . rankedCache . getTopicNavigator ( maxentries ) ;
2007-09-04 01:43:55 +02:00
}
2009-01-12 16:06:22 +01:00
2009-06-09 00:01:26 +02:00
public ArrayList < NavigatorEntry > getAuthorNavigator ( final int maxentries ) {
// returns a list of authors so far seen on result set
return this . rankedCache . getAuthorNavigator ( maxentries ) ;
}
2009-01-12 16:06:22 +01:00
public ResultEntry oneResult ( final int item ) {
2009-07-09 00:14:57 +02:00
if ( ( query . domType = = QueryParams . SEARCHDOM_GLOBALDHT ) | |
( query . domType = = QueryParams . SEARCHDOM_CLUSTERALL ) ) {
2009-01-12 16:06:22 +01:00
// this is a search using remote search threads. Also the local
// search thread is started as background process
if ( ( localSearchThread ! = null ) & & ( localSearchThread . isAlive ( ) ) ) {
// in case that the local search takes longer than some other
2009-06-04 22:58:47 +02:00
// remote search requests, wait that the local process terminates first
try { localSearchThread . join ( ) ; } catch ( InterruptedException e ) { }
2009-01-12 16:06:22 +01:00
}
2007-09-04 01:43:55 +02:00
}
2009-08-30 12:28:23 +02:00
return this . results . oneResult ( item ) ;
2007-09-04 01:43:55 +02:00
}
boolean secondarySearchStartet = false ;
2007-08-25 01:12:59 +02:00
2008-03-04 23:45:45 +01:00
void prepareSecondarySearch ( ) {
2008-02-01 00:40:47 +01:00
if ( secondarySearchStartet ) return ; // don't do this twice
2006-09-13 19:13:28 +02:00
2007-09-04 01:43:55 +02:00
if ( ( rcAbstracts = = null ) | | ( rcAbstracts . size ( ) ! = query . queryHashes . size ( ) ) ) return ; // secondary search not possible (yet)
this . secondarySearchStartet = true ;
2006-09-13 19:13:28 +02:00
2007-12-22 03:58:38 +01:00
/ *
2007-09-04 01:43:55 +02:00
// catch up index abstracts and join them; then call peers again to submit their urls
2009-05-08 21:26:36 +02:00
System . out . println ( " DEBUG-INDEXABSTRACT: " + rcAbstracts . size ( ) + " word references caught, " + query . queryHashes . size ( ) + " needed " ) ;
2007-09-04 01:43:55 +02:00
2006-09-13 19:13:28 +02:00
Iterator i = rcAbstracts . entrySet ( ) . iterator ( ) ;
Map . Entry entry ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
System . out . println ( " DEBUG-INDEXABSTRACT: hash " + ( String ) entry . getKey ( ) + " : " + ( ( query . queryHashes . contains ( ( String ) entry . getKey ( ) ) ) ? " NEEDED " : " NOT NEEDED " ) + " ; " + ( ( TreeMap ) entry . getValue ( ) ) . size ( ) + " entries " ) ;
}
2007-12-22 03:58:38 +01:00
* /
2009-01-30 16:33:00 +01:00
final TreeMap < String , String > abstractJoin = ( rcAbstracts . size ( ) = = query . queryHashes . size ( ) ) ? SetTools . joinConstructive ( rcAbstracts . values ( ) , true ) : new TreeMap < String , String > ( ) ;
2008-05-03 11:06:00 +02:00
if ( abstractJoin . size ( ) ! = 0 ) {
2007-12-22 03:58:38 +01:00
//System.out.println("DEBUG-INDEXABSTRACT: index abstracts delivered " + abstractJoin.size() + " additional results for secondary search");
2006-09-13 19:13:28 +02:00
// generate query for secondary search
2008-08-02 14:12:04 +02:00
final TreeMap < String , String > secondarySearchURLs = new TreeMap < String , String > ( ) ; // a (peerhash:urlhash-liststring) mapping
2008-01-18 18:14:02 +01:00
Iterator < Map . Entry < String , String > > i1 = abstractJoin . entrySet ( ) . iterator ( ) ;
Map . Entry < String , String > entry1 ;
2009-05-28 16:26:05 +02:00
String url , urls , peer , ps ;
final String mypeerhash = peers . mySeed ( ) . hash ;
2006-09-16 02:07:09 +02:00
boolean mypeerinvolved = false ;
2006-10-31 03:45:41 +01:00
int mypeercount ;
2006-09-13 19:13:28 +02:00
while ( i1 . hasNext ( ) ) {
2008-01-18 18:14:02 +01:00
entry1 = i1 . next ( ) ;
url = entry1 . getKey ( ) ;
2009-05-28 16:26:05 +02:00
ps = entry1 . getValue ( ) ;
2007-12-22 03:58:38 +01:00
//System.out.println("DEBUG-INDEXABSTRACT: url " + url + ": from peers " + peers);
2006-10-31 03:45:41 +01:00
mypeercount = 0 ;
2009-05-28 16:26:05 +02:00
for ( int j = 0 ; j < ps . length ( ) ; j = j + 12 ) {
peer = ps . substring ( j , j + 12 ) ;
2006-10-31 03:45:41 +01:00
if ( ( peer . equals ( mypeerhash ) ) & & ( mypeercount + + > 1 ) ) continue ;
//if (peers.indexOf(peer) < j) continue; // avoid doubles that may appear in the abstractJoin
2008-06-06 18:01:27 +02:00
urls = secondarySearchURLs . get ( peer ) ;
2006-09-13 19:13:28 +02:00
urls = ( urls = = null ) ? url : urls + url ;
secondarySearchURLs . put ( peer , urls ) ;
}
2006-10-31 03:45:41 +01:00
if ( mypeercount = = 1 ) mypeerinvolved = true ;
2006-09-13 19:13:28 +02:00
}
// compute words for secondary search and start the secondary searches
i1 = secondarySearchURLs . entrySet ( ) . iterator ( ) ;
String words ;
2006-09-16 02:07:09 +02:00
secondarySearchThreads = new yacySearch [ ( mypeerinvolved ) ? secondarySearchURLs . size ( ) - 1 : secondarySearchURLs . size ( ) ] ;
2006-09-13 19:13:28 +02:00
int c = 0 ;
while ( i1 . hasNext ( ) ) {
2008-01-18 18:14:02 +01:00
entry1 = i1 . next ( ) ;
peer = entry1 . getKey ( ) ;
2006-09-16 02:07:09 +02:00
if ( peer . equals ( mypeerhash ) ) continue ; // we dont need to ask ourself
2008-06-06 18:01:27 +02:00
urls = entry1 . getValue ( ) ;
2009-08-24 17:24:02 +02:00
words = rcAbstracts . wordsFromPeer ( peer , urls ) ;
2009-06-04 01:49:06 +02:00
assert words . length ( ) > = 12 : " words = " + words ;
2007-12-22 03:58:38 +01:00
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " has urls: " + urls);
//System.out.println("DEBUG-INDEXABSTRACT ***: peer " + peer + " from words: " + words);
2006-09-13 19:13:28 +02:00
secondarySearchThreads [ c + + ] = yacySearch . secondaryRemoteSearch (
2009-07-19 22:37:44 +02:00
words , " " , urls , indexSegment , peers , crawlResults , this . rankedCache , peer , Switchboard . urlBlacklist ,
2008-01-08 21:12:31 +01:00
query . ranking , query . constraint , preselectedPeerHashes ) ;
2006-09-13 19:13:28 +02:00
}
2008-05-03 11:06:00 +02:00
//} else {
//System.out.println("DEBUG-INDEXABSTRACT: no success using index abstracts from remote peers");
2006-09-13 19:13:28 +02:00
}
}
2008-08-02 14:12:04 +02:00
public void remove ( final String urlhash ) {
2007-08-26 20:18:35 +02:00
// removes the url hash reference from last search result
2007-09-04 01:43:55 +02:00
/*indexRWIEntry e =*/ this . rankedCache . remove ( urlhash ) ;
//assert e != null;
2007-08-26 20:18:35 +02:00
}
2006-09-13 19:13:28 +02:00
2009-08-27 17:19:48 +02:00
public ResultFetcher result ( ) {
2009-08-30 12:28:23 +02:00
return this . results ;
2009-08-26 17:59:55 +02:00
}
2005-10-10 02:32:15 +02:00
}