2005-10-05 15:14:18 +02:00
// search.java
2007-08-15 13:36:59 +02:00
// (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
2005-10-05 15:14:18 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
2007-08-15 13:36:59 +02:00
// LICENSE
2011-06-13 23:44:03 +02:00
//
2005-04-07 21:19:42 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2005-05-12 19:50:45 +02:00
// You must compile this file with
2005-04-07 21:19:42 +02:00
// javac -classpath .:../../Classes search.java
// if the shell's current path is htroot/yacy
2010-10-26 17:00:22 +02:00
import java.io.IOException ;
2007-08-28 14:15:46 +02:00
import java.util.ArrayList ;
2006-09-06 19:51:28 +02:00
import java.util.Iterator ;
2006-09-11 00:36:47 +02:00
import java.util.Map ;
2010-04-25 23:37:36 +02:00
import java.util.TreeMap ;
2007-01-15 02:50:57 +01:00
import java.util.TreeSet ;
2011-05-05 02:25:14 +02:00
import java.util.regex.Pattern ;
2006-09-06 19:51:28 +02:00
2011-05-27 10:24:54 +02:00
import net.yacy.cora.document.ASCII ;
2010-05-25 14:54:57 +02:00
import net.yacy.cora.document.RSSMessage ;
2010-09-14 14:38:05 +02:00
import net.yacy.cora.protocol.Domains ;
2010-08-23 14:32:02 +02:00
import net.yacy.cora.protocol.HeaderFramework ;
import net.yacy.cora.protocol.RequestHeader ;
2011-08-23 00:37:53 +02:00
import net.yacy.cora.ranking.ScoreMap ;
2011-08-23 22:18:30 +02:00
import net.yacy.cora.ranking.WeakPriorityBlockingQueue ;
2011-06-13 23:44:03 +02:00
import net.yacy.cora.services.federated.yacy.CacheStrategy ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.DigestURI ;
import net.yacy.kelondro.data.word.WordReference ;
2011-05-16 00:57:31 +02:00
import net.yacy.kelondro.data.word.WordReferenceFactory ;
2010-04-15 15:22:59 +02:00
import net.yacy.kelondro.data.word.WordReferenceRow ;
import net.yacy.kelondro.index.HandleSet ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Bitfield ;
2009-10-10 02:39:15 +02:00
import net.yacy.kelondro.rwi.ReferenceContainer ;
2011-05-27 10:24:54 +02:00
import net.yacy.kelondro.util.ByteBuffer ;
2009-12-08 15:25:51 +01:00
import net.yacy.kelondro.util.EventTracker ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.util.ISO639 ;
2011-05-26 16:35:32 +02:00
import net.yacy.kelondro.util.MemoryControl ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.EventChannel ;
import net.yacy.peers.Network ;
2011-11-17 02:05:45 +01:00
import net.yacy.peers.Protocol ;
import net.yacy.peers.Seed ;
2011-09-25 18:59:06 +02:00
import net.yacy.peers.graphics.ProfilingGraph ;
import net.yacy.search.Switchboard ;
import net.yacy.search.SwitchboardConstants ;
import net.yacy.search.index.Segment ;
import net.yacy.search.index.Segments ;
import net.yacy.search.query.AccessTracker ;
import net.yacy.search.query.QueryParams ;
import net.yacy.search.query.SearchEvent ;
import net.yacy.search.query.SearchEventCache ;
import net.yacy.search.ranking.RankingProfile ;
import net.yacy.search.snippet.ContentDomain ;
import net.yacy.search.snippet.ResultEntry ;
2006-01-20 16:14:21 +01:00
import de.anomic.server.serverCore ;
2005-05-05 07:36:42 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
2008-01-08 21:12:31 +01:00
import de.anomic.tools.crypt ;
2005-04-07 21:19:42 +02:00
2005-10-05 12:45:33 +02:00
public final class search {
2005-04-07 21:19:42 +02:00
2009-07-19 22:37:44 +02:00
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
2005-10-05 15:14:18 +02:00
// return variable that accumulates replacements
2009-07-19 22:37:44 +02:00
final Switchboard sb = ( Switchboard ) env ;
2008-03-12 01:05:18 +01:00
sb . remoteSearchLastAccess = System . currentTimeMillis ( ) ;
2011-06-13 23:44:03 +02:00
2008-08-02 14:12:04 +02:00
final serverObjects prop = new serverObjects ( ) ;
2011-11-23 23:21:14 +01:00
// set nice default values for error cases
prop . put ( " searchtime " , " 0 " ) ;
prop . put ( " references " , " " ) ;
prop . put ( " joincount " , " 0 " ) ;
prop . put ( " linkcount " , " 0 " ) ;
prop . put ( " links " , " " ) ;
prop . put ( " indexcount " , " " ) ;
prop . put ( " indexabstract " , " " ) ;
2011-11-24 00:39:34 +01:00
prop . put ( " fwhop " , " " ) ; // hops (depth) of forwards that had been performed to construct this result
prop . put ( " fwsrc " , " " ) ; // peers that helped to construct this result
prop . put ( " fwrec " , " " ) ; // peers that would have helped to construct this result (recommendations)
2011-11-23 23:21:14 +01:00
if ( post = = null | | env = = null ) return prop ;
2011-10-04 11:06:24 +02:00
if ( ! Protocol . authentifyRequest ( post , env ) ) return prop ;
2009-07-19 22:37:44 +02:00
final String client = header . get ( HeaderFramework . CONNECTION_PROP_CLIENTIP ) ;
2005-04-07 21:19:42 +02:00
2008-03-12 01:05:18 +01:00
//System.out.println("yacy: search received request = " + post.toString());
final String oseed = post . get ( " myseed " , " " ) ; // complete seed of the requesting peer
// final String youare = post.get("youare", ""); // seed hash of the target peer, used for testing network stability
final String key = post . get ( " key " , " " ) ; // transmission key for response
final String query = post . get ( " query " , " " ) ; // a string of word hashes that shall be searched and combined
final String exclude = post . get ( " exclude " , " " ) ; // a string of word hashes that shall not be within the search result
2008-08-02 14:12:04 +02:00
final String urls = post . get ( " urls " , " " ) ; // a string of url hashes that are preselected for the search: no other may be returned
final String abstracts = post . get ( " abstracts " , " " ) ; // a string of word hashes for abstracts that shall be generated, or 'auto' (for maxcount-word), or '' (for none)
2008-03-12 01:05:18 +01:00
// final String fwdep = post.get("fwdep", ""); // forward depth. if "0" then peer may NOT ask another peer for more results
// final String fwden = post.get("fwden", ""); // forward deny, a list of seed hashes. They may NOT be target of forward hopping
2011-03-04 14:44:00 +01:00
final int count = Math . min ( ( int ) sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXCOUNT_DEFAULT , 100 ) , post . getInt ( " count " , 10 ) ) ; // maximum number of wanted results
2011-11-23 23:21:14 +01:00
final long maxtime = Math . min ( ( int ) sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXTIME_DEFAULT , 3000 ) , post . getLong ( " time " , 3000 ) ) ; // maximum waiting time
2008-03-12 01:05:18 +01:00
final int maxdist = post . getInt ( " maxdist " , Integer . MAX_VALUE ) ;
final String prefer = post . get ( " prefer " , " " ) ;
2011-11-26 14:40:33 +01:00
final String modifier = post . get ( " modifier " , " " ) . trim ( ) ;
2008-03-12 01:05:18 +01:00
final String contentdom = post . get ( " contentdom " , " text " ) ;
2011-05-06 00:37:06 +02:00
final String filter = post . get ( " filter " , " .* " ) ; // a filter on the url
final Pattern snippetPattern = Pattern . compile ( post . get ( " snippet " , " .* " ) ) ; // a filter on the snippet
2009-04-02 15:26:47 +02:00
String sitehash = post . get ( " sitehash " , " " ) ; if ( sitehash . length ( ) = = 0 ) sitehash = null ;
2009-06-09 01:30:12 +02:00
String authorhash = post . get ( " authorhash " , " " ) ; if ( authorhash . length ( ) = = 0 ) authorhash = null ;
2008-09-21 02:04:42 +02:00
String language = post . get ( " language " , " " ) ;
2009-10-11 02:12:19 +02:00
if ( language = = null | | language . length ( ) = = 0 | | ! ISO639 . exists ( language ) ) {
2008-09-21 02:04:42 +02:00
// take language from the user agent
String agent = header . get ( " User-Agent " ) ;
if ( agent = = null ) agent = System . getProperty ( " user.language " ) ;
2009-10-11 02:12:19 +02:00
language = ( agent = = null ) ? " en " : ISO639 . userAgentLanguageDetection ( agent ) ;
2008-10-01 22:20:39 +02:00
if ( language = = null ) language = " en " ;
2008-09-21 02:04:42 +02:00
}
2011-11-23 22:40:11 +01:00
final int partitions = post . getInt ( " partitions " , 30 ) ;
String profile = post . get ( " profile " , " " ) ; // remote profile hand-over
2008-03-12 01:05:18 +01:00
if ( profile . length ( ) > 0 ) profile = crypt . simpleDecode ( profile , null ) ;
//final boolean includesnippet = post.get("includesnippet", "false").equals("true");
2009-01-30 16:33:00 +01:00
Bitfield constraint = ( ( post . containsKey ( " constraint " ) ) & & ( post . get ( " constraint " , " " ) . length ( ) > 0 ) ) ? new Bitfield ( 4 , post . get ( " constraint " , " ______ " ) ) : null ;
2008-03-12 01:05:18 +01:00
if ( constraint ! = null ) {
// check bad handover parameter from older versions
boolean allon = true ;
for ( int i = 0 ; i < 32 ; i + + ) {
if ( ! constraint . get ( i ) ) { allon = false ; break ; }
}
if ( allon ) constraint = null ;
2008-02-05 17:32:10 +01:00
}
2008-03-12 01:05:18 +01:00
// final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers
// Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME)); // read remote time
2006-10-10 14:22:16 +02:00
// test:
2011-01-28 11:54:13 +01:00
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Q (search for linux)
// http://localhost:8090/yacy/search.html?query=gh8DKIhGKXws (search for book)
// http://localhost:8090/yacy/search.html?query=UEhMGfGv2vOE (search for kernel)
// http://localhost:8090/yacy/search.html?query=ZX-LjaYo74PP (search for help)
// http://localhost:8090/yacy/search.html?query=uDqIalxDfM2a (search for mail)
// http://localhost:8090/yacy/search.html?query=4galTpdpDM5Qgh8DKIhGKXws&abstracts=auto (search for linux and book, generate abstract automatically)
// http://localhost:8090/yacy/search.html?query=&abstracts=4galTpdpDM5Q (only abstracts for linux)
2007-07-05 01:48:52 +02:00
2011-11-23 22:44:56 +01:00
if ( sb . isRobinsonMode ( ) & & ! sb . isPublicRobinson ( ) ) {
// if we are a robinson cluster, answer only if this client is known by our network definition
2008-03-12 01:05:18 +01:00
return prop ;
2007-04-24 17:11:12 +02:00
}
2011-06-13 23:44:03 +02:00
2008-05-17 02:11:35 +02:00
// check the search tracker
TreeSet < Long > trackerHandles = sb . remoteSearchTracker . get ( client ) ;
2010-09-14 15:01:18 +02:00
if ( trackerHandles = = null ) trackerHandles = new TreeSet < Long > ( ) ;
boolean block = false ;
synchronized ( trackerHandles ) {
if ( trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 3000 ) ) . size ( ) > 1 ) {
block = true ;
}
2011-11-23 23:21:14 +01:00
}
if ( ! block ) synchronized ( trackerHandles ) {
2010-09-14 15:01:18 +02:00
if ( trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 60000 ) ) . size ( ) > 12 ) {
block = true ;
2010-05-12 00:26:18 +02:00
}
2011-11-23 23:21:14 +01:00
}
if ( ! block ) synchronized ( trackerHandles ) {
2010-09-14 15:01:18 +02:00
if ( trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 600000 ) ) . size ( ) > 36 ) {
block = true ;
2010-05-12 00:26:18 +02:00
}
2008-05-17 02:11:35 +02:00
}
2011-11-23 23:21:14 +01:00
if ( block & & Domains . isLocal ( client , null ) ) block = false ; // check isLocal here to prevent dns lookup for client
2010-09-14 15:01:18 +02:00
if ( block ) {
return prop ;
}
2011-06-13 23:44:03 +02:00
2006-01-20 16:14:21 +01:00
// tell all threads to do nothing for a specific time
2010-09-14 14:38:05 +02:00
sb . intermissionAllThreads ( 100 ) ;
2006-01-20 16:14:21 +01:00
2010-09-14 11:06:27 +02:00
EventTracker . delete ( EventTracker . EClass . SEARCH ) ;
2010-10-11 14:44:07 +02:00
final HandleSet abstractSet = ( abstracts . length ( ) = = 0 | | abstracts . equals ( " auto " ) ) ? null : QueryParams . hashes2Set ( abstracts ) ;
2011-06-13 23:44:03 +02:00
2006-01-20 16:14:21 +01:00
// store accessing peer
2011-10-04 11:06:24 +02:00
Seed remoteSeed ;
2010-10-26 17:00:22 +02:00
try {
2011-10-04 11:06:24 +02:00
remoteSeed = Seed . genRemoteSeed ( oseed , key , false , client ) ;
2011-06-13 23:44:03 +02:00
} catch ( final IOException e ) {
2011-10-04 11:06:24 +02:00
Network . log . logInfo ( " yacy.search: access with bad seed: " + e . getMessage ( ) ) ;
2010-10-26 17:00:22 +02:00
remoteSeed = null ;
}
2009-05-28 16:26:05 +02:00
if ( sb . peers = = null ) {
2011-10-04 11:06:24 +02:00
Network . log . logSevere ( " yacy.search: seed cache not initialized " ) ;
2005-04-07 21:19:42 +02:00
} else {
2009-05-28 16:26:05 +02:00
sb . peers . peerActions . peerArrival ( remoteSeed , true ) ;
2008-02-06 20:00:18 +01:00
}
2006-01-20 16:14:21 +01:00
// prepare search
2010-04-15 15:22:59 +02:00
final HandleSet queryhashes = QueryParams . hashes2Set ( query ) ;
final HandleSet excludehashes = ( exclude . length ( ) = = 0 ) ? new HandleSet ( WordReferenceRow . urlEntryRow . primaryKeyLength , WordReferenceRow . urlEntryRow . objectOrder , 0 ) : QueryParams . hashes2Set ( exclude ) ;
2005-10-05 15:14:18 +02:00
final long timestamp = System . currentTimeMillis ( ) ;
2011-06-13 23:44:03 +02:00
2008-03-12 01:05:18 +01:00
// prepare a search profile
2009-11-19 00:56:05 +01:00
final RankingProfile rankingProfile = ( profile . length ( ) = = 0 ) ? new RankingProfile ( ContentDomain . contentdomParser ( contentdom ) ) : new RankingProfile ( " " , profile ) ;
2011-06-13 23:44:03 +02:00
2006-10-10 14:22:16 +02:00
// prepare an abstract result
2009-11-18 16:13:06 +01:00
final StringBuilder indexabstract = new StringBuilder ( 6000 ) ;
2007-07-31 12:00:17 +02:00
int indexabstractContainercount = 0 ;
2006-09-14 00:19:34 +02:00
int joincount = 0 ;
2009-07-09 00:14:57 +02:00
QueryParams theQuery = null ;
2009-08-24 17:24:02 +02:00
SearchEvent theSearch = null ;
2010-10-04 13:54:48 +02:00
ArrayList < WeakPriorityBlockingQueue . Element < ResultEntry > > accu = null ;
2010-10-12 03:23:49 +02:00
if ( query . length ( ) = = 0 & & abstractSet ! = null ) {
2006-10-10 14:22:16 +02:00
// this is _not_ a normal search, only a request for index abstracts
2011-06-13 23:44:03 +02:00
final Segment indexSegment = sb . indexSegments . segment ( Segments . Process . PUBLIC ) ;
2009-07-09 00:14:57 +02:00
theQuery = new QueryParams (
2009-04-02 15:26:47 +02:00
null ,
abstractSet ,
2010-04-15 15:22:59 +02:00
new HandleSet ( WordReferenceRow . urlEntryRow . primaryKeyLength , WordReferenceRow . urlEntryRow . objectOrder , 0 ) ,
2009-04-02 15:26:47 +02:00
null ,
2011-05-05 02:25:14 +02:00
snippetPattern ,
2009-06-22 14:25:18 +02:00
null ,
2011-11-26 14:40:33 +01:00
modifier ,
2009-04-02 15:26:47 +02:00
maxdist ,
prefer ,
2009-11-19 00:56:05 +01:00
ContentDomain . contentdomParser ( contentdom ) ,
2009-04-02 15:26:47 +02:00
language ,
2009-06-07 23:48:01 +02:00
" " , // no navigation
2011-06-13 23:44:03 +02:00
CacheStrategy . CACHEONLY ,
2009-04-02 15:26:47 +02:00
count ,
0 ,
filter ,
2011-11-17 02:05:45 +01:00
QueryParams . Searchdom . LOCAL ,
2009-04-02 15:26:47 +02:00
- 1 ,
null ,
false ,
2011-06-13 23:44:03 +02:00
sitehash ,
2009-06-09 01:30:12 +02:00
authorhash ,
2009-10-11 02:12:19 +02:00
DigestURI . TLD_any_zone_filter ,
2009-04-02 15:26:47 +02:00
client ,
2009-11-24 12:13:11 +01:00
false ,
indexSegment ,
2010-10-18 10:09:59 +02:00
rankingProfile ,
2011-01-22 10:46:00 +01:00
header . get ( RequestHeader . USER_AGENT , " " ) ,
false
2009-11-24 12:13:11 +01:00
) ;
2011-10-04 11:06:24 +02:00
Network . log . logInfo ( " INIT HASH SEARCH (abstracts only): " + QueryParams . anonymizedQueryHashes ( theQuery . queryHashes ) + " - " + theQuery . displayResults ( ) + " links " ) ;
2006-10-10 14:22:16 +02:00
2008-08-02 14:12:04 +02:00
final long timer = System . currentTimeMillis ( ) ;
2009-05-29 12:03:35 +02:00
//final Map<byte[], ReferenceContainer<WordReference>>[] containers = sb.indexSegment.index().searchTerm(theQuery.queryHashes, theQuery.excludeHashes, plasmaSearchQuery.hashes2StringSet(urls));
2010-04-25 23:37:36 +02:00
final TreeMap < byte [ ] , ReferenceContainer < WordReference > > incc = indexSegment . termIndex ( ) . searchConjunction ( theQuery . queryHashes , QueryParams . hashes2Handles ( urls ) ) ;
2011-06-13 23:44:03 +02:00
2011-07-14 09:07:06 +02:00
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch ( theQuery . id ( true ) , SearchEvent . Type . COLLECTION , " " , incc . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
2009-05-29 12:03:35 +02:00
if ( incc ! = null ) {
final Iterator < Map . Entry < byte [ ] , ReferenceContainer < WordReference > > > ci = incc . entrySet ( ) . iterator ( ) ;
2009-04-16 17:29:00 +02:00
Map . Entry < byte [ ] , ReferenceContainer < WordReference > > entry ;
byte [ ] wordhash ;
2006-10-10 14:22:16 +02:00
while ( ci . hasNext ( ) ) {
2008-01-11 01:12:01 +01:00
entry = ci . next ( ) ;
wordhash = entry . getKey ( ) ;
2009-04-15 08:34:27 +02:00
final ReferenceContainer < WordReference > container = entry . getValue ( ) ;
2007-07-31 12:00:17 +02:00
indexabstractContainercount + = container . size ( ) ;
2011-03-15 02:03:35 +01:00
indexabstract . append ( " indexabstract. " ) ;
2011-05-27 10:24:54 +02:00
indexabstract . append ( ASCII . String ( wordhash ) ) ;
2011-03-15 02:03:35 +01:00
indexabstract . append ( " = " ) ;
2011-05-16 00:57:31 +02:00
indexabstract . append ( WordReferenceFactory . compressIndex ( container , null , 1000 ) . toString ( ) ) ;
2011-03-15 02:03:35 +01:00
indexabstract . append ( serverCore . CRLF_STRING ) ;
2006-09-12 02:42:42 +02:00
}
2006-09-06 19:51:28 +02:00
}
2011-06-13 23:44:03 +02:00
2007-10-24 23:38:19 +02:00
prop . put ( " indexcount " , " " ) ;
prop . put ( " joincount " , " 0 " ) ;
prop . put ( " references " , " " ) ;
2011-06-13 23:44:03 +02:00
2006-10-10 14:22:16 +02:00
} else {
// retrieve index containers from search request
2009-07-09 00:14:57 +02:00
theQuery = new QueryParams (
2011-06-13 23:44:03 +02:00
null ,
queryhashes ,
excludehashes ,
2011-05-05 02:25:14 +02:00
null ,
snippetPattern ,
2009-06-22 14:25:18 +02:00
null ,
2011-11-26 14:40:33 +01:00
modifier ,
2011-06-13 23:44:03 +02:00
maxdist ,
2009-11-24 12:13:11 +01:00
prefer ,
ContentDomain . contentdomParser ( contentdom ) ,
2009-06-07 23:48:01 +02:00
language ,
" " , // no navigation
2011-06-13 23:44:03 +02:00
CacheStrategy . CACHEONLY ,
count ,
0 ,
filter ,
2011-11-17 02:05:45 +01:00
QueryParams . Searchdom . LOCAL ,
2011-06-13 23:44:03 +02:00
- 1 ,
constraint ,
2009-04-02 15:26:47 +02:00
false ,
2009-06-09 01:30:12 +02:00
sitehash ,
authorhash ,
2009-10-11 02:12:19 +02:00
DigestURI . TLD_any_zone_filter ,
2011-06-13 23:44:03 +02:00
client ,
2009-11-24 12:13:11 +01:00
false ,
sb . indexSegments . segment ( Segments . Process . PUBLIC ) ,
2010-10-18 10:09:59 +02:00
rankingProfile ,
2011-01-22 10:46:00 +01:00
header . get ( RequestHeader . USER_AGENT , " " ) ,
false
2009-11-24 12:13:11 +01:00
) ;
2011-10-04 11:06:24 +02:00
Network . log . logInfo ( " INIT HASH SEARCH (query- " + abstracts + " ): " + QueryParams . anonymizedQueryHashes ( theQuery . queryHashes ) + " - " + theQuery . displayResults ( ) + " links " ) ;
EventChannel . channels ( EventChannel . REMOTESEARCH ) . addMessage ( new RSSMessage ( " Remote Search Request from " + ( ( remoteSeed = = null ) ? " unknown " : remoteSeed . getName ( ) ) , QueryParams . anonymizedQueryHashes ( theQuery . queryHashes ) , " " ) ) ;
2011-06-13 23:44:03 +02:00
2008-01-08 21:12:31 +01:00
// make event
2011-03-04 14:44:00 +01:00
theSearch = SearchEventCache . getEvent ( theQuery , sb . peers , sb . tables , null , abstracts . length ( ) > 0 , sb . loader , count , maxtime , ( int ) sb . getConfigLong ( SwitchboardConstants . DHT_BURST_ROBINSON , 0 ) , ( int ) sb . getConfigLong ( SwitchboardConstants . DHT_BURST_MULTIWORD , 0 ) ) ;
2011-06-13 23:44:03 +02:00
2006-10-10 14:22:16 +02:00
// set statistic details of search result and find best result index set
2011-05-07 01:04:27 +02:00
joincount = theSearch . getRankingResult ( ) . getLocalIndexCount ( ) - theSearch . getRankingResult ( ) . getMissCount ( ) - theSearch . getRankingResult ( ) . getSortOutCount ( ) ;
2010-10-11 14:44:07 +02:00
prop . put ( " joincount " , Integer . toString ( joincount ) ) ;
if ( joincount ! = 0 ) {
2011-03-04 14:44:00 +01:00
accu = theSearch . result ( ) . completeResults ( maxtime ) ;
2010-10-11 14:44:07 +02:00
}
2010-12-02 13:19:59 +01:00
if ( joincount < = 0 | | abstracts . length ( ) = = 0 ) {
2007-10-24 23:38:19 +02:00
prop . put ( " indexcount " , " " ) ;
2006-09-14 00:19:34 +02:00
} else {
2007-08-28 14:15:46 +02:00
// attach information about index abstracts
2009-11-18 16:13:06 +01:00
final StringBuilder indexcount = new StringBuilder ( 6000 ) ;
2009-04-16 17:29:00 +02:00
Map . Entry < byte [ ] , Integer > entry ;
2009-08-26 17:59:55 +02:00
final Iterator < Map . Entry < byte [ ] , Integer > > i = theSearch . abstractsCount ( ) ;
2007-08-28 14:15:46 +02:00
while ( i . hasNext ( ) ) {
2008-01-11 01:12:01 +01:00
entry = i . next ( ) ;
2011-11-24 23:45:31 +01:00
indexcount . append ( " indexcount. " ) . append ( ASCII . String ( entry . getKey ( ) ) ) . append ( '=' ) . append ( ( entry . getValue ( ) ) . toString ( ) ) . append ( serverCore . CRLF_STRING ) ;
2007-08-28 14:15:46 +02:00
}
if ( abstractSet ! = null ) {
// if a specific index-abstract is demanded, attach it here
2009-04-16 17:29:00 +02:00
final Iterator < byte [ ] > j = abstractSet . iterator ( ) ;
byte [ ] wordhash ;
2008-01-11 01:12:01 +01:00
while ( j . hasNext ( ) ) {
2008-06-06 18:01:27 +02:00
wordhash = j . next ( ) ;
2009-08-26 17:59:55 +02:00
indexabstractContainercount + = theSearch . abstractsCount ( wordhash ) ;
2011-05-27 10:24:54 +02:00
indexabstract . append ( " indexabstract. " ) . append ( ASCII . String ( wordhash ) ) . append ( " = " ) . append ( theSearch . abstractsString ( wordhash ) ) . append ( serverCore . CRLF_STRING ) ;
2006-10-10 14:22:16 +02:00
}
}
2007-10-24 23:38:19 +02:00
prop . put ( " indexcount " , indexcount . toString ( ) ) ;
2011-06-13 23:44:03 +02:00
2006-10-10 14:22:16 +02:00
// generate compressed index for maxcounthash
// this is not needed if the search is restricted to specific
// urls, because it is a re-search
2009-08-26 17:59:55 +02:00
if ( ( theSearch . getAbstractsMaxCountHash ( ) = = null ) | | ( urls . length ( ) ! = 0 ) | | ( queryhashes . size ( ) < = 1 ) | | ( abstracts . length ( ) = = 0 ) ) {
2007-10-24 23:38:19 +02:00
prop . put ( " indexabstract " , " " ) ;
2006-10-10 14:22:16 +02:00
} else if ( abstracts . equals ( " auto " ) ) {
2007-07-31 12:00:17 +02:00
// automatically attach the index abstract for the index that has the most references. This should be our target dht position
2009-08-26 17:59:55 +02:00
indexabstractContainercount + = theSearch . abstractsCount ( theSearch . getAbstractsMaxCountHash ( ) ) ;
2011-05-27 10:24:54 +02:00
indexabstract . append ( " indexabstract. " ) . append ( ASCII . String ( theSearch . getAbstractsMaxCountHash ( ) ) ) . append ( " = " ) . append ( theSearch . abstractsString ( theSearch . getAbstractsMaxCountHash ( ) ) ) . append ( serverCore . CRLF_STRING ) ;
if ( ( theSearch . getAbstractsNearDHTHash ( ) ! = null ) & & ( ! ( ByteBuffer . equals ( theSearch . getAbstractsNearDHTHash ( ) , theSearch . getAbstractsMaxCountHash ( ) ) ) ) ) {
2007-07-31 12:00:17 +02:00
// in case that the neardhthash is different from the maxcounthash attach also the neardhthash-container
2009-08-26 17:59:55 +02:00
indexabstractContainercount + = theSearch . abstractsCount ( theSearch . getAbstractsNearDHTHash ( ) ) ;
2011-05-27 10:24:54 +02:00
indexabstract . append ( " indexabstract. " ) . append ( ASCII . String ( theSearch . getAbstractsNearDHTHash ( ) ) ) . append ( " = " ) . append ( theSearch . abstractsString ( theSearch . getAbstractsNearDHTHash ( ) ) ) . append ( serverCore . CRLF_STRING ) ;
2006-10-10 14:22:16 +02:00
}
//System.out.println("DEBUG-ABSTRACTGENERATION: maxcounthash = " + maxcounthash);
//System.out.println("DEBUG-ABSTRACTGENERATION: neardhthash = "+ neardhthash);
//yacyCore.log.logFine("DEBUG HASH SEARCH: " + indexabstract);
2006-09-14 00:19:34 +02:00
}
2006-09-12 02:42:42 +02:00
}
2010-02-24 14:53:55 +01:00
if ( partitions > 0 ) sb . searchQueriesGlobal + = 1d / partitions ; // increase query counter
2011-06-13 23:44:03 +02:00
2007-08-28 14:15:46 +02:00
// prepare reference hints
2008-08-02 14:12:04 +02:00
final long timer = System . currentTimeMillis ( ) ;
2011-06-13 23:44:03 +02:00
final ScoreMap < String > topicNavigator = theSearch . getTopicNavigator ( 5 ) ;
2009-11-18 16:13:06 +01:00
final StringBuilder refstr = new StringBuilder ( 6000 ) ;
2011-06-13 23:44:03 +02:00
final Iterator < String > navigatorIterator = topicNavigator . keys ( false ) ;
2010-10-16 01:45:12 +02:00
int i = 0 ;
String name ;
while ( i < 5 & & navigatorIterator . hasNext ( ) ) {
name = navigatorIterator . next ( ) ;
refstr . append ( " , " ) . append ( name ) ;
i + + ;
2007-09-08 13:50:19 +02:00
}
2007-10-24 23:38:19 +02:00
prop . put ( " references " , ( refstr . length ( ) > 0 ) ? refstr . substring ( 1 ) : refstr . toString ( ) ) ;
2011-07-14 09:07:06 +02:00
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch ( theQuery . id ( true ) , SearchEvent . Type . REFERENCECOLLECTION , " " , i , System . currentTimeMillis ( ) - timer ) , false ) ;
2006-09-11 00:36:47 +02:00
}
2007-10-24 23:38:19 +02:00
prop . put ( " indexabstract " , indexabstract . toString ( ) ) ;
2011-06-13 23:44:03 +02:00
2008-02-06 20:00:18 +01:00
// prepare result
2011-03-15 02:03:35 +01:00
if ( joincount = = 0 | | accu = = null | | accu . isEmpty ( ) ) {
2011-06-13 23:44:03 +02:00
2008-03-12 01:05:18 +01:00
// no results
prop . put ( " links " , " " ) ;
prop . put ( " linkcount " , " 0 " ) ;
prop . put ( " references " , " " ) ;
} else {
2006-09-06 19:51:28 +02:00
// result is a List of urlEntry elements
2008-08-02 14:12:04 +02:00
final long timer = System . currentTimeMillis ( ) ;
2009-11-18 16:13:06 +01:00
final StringBuilder links = new StringBuilder ( 6000 ) ;
2006-12-11 02:31:23 +01:00
String resource = null ;
2010-10-04 13:54:48 +02:00
WeakPriorityBlockingQueue . Element < ResultEntry > entry ;
2007-08-28 14:15:46 +02:00
for ( int i = 0 ; i < accu . size ( ) ; i + + ) {
2008-02-21 11:06:57 +01:00
entry = accu . get ( i ) ;
2010-09-09 17:30:25 +02:00
resource = entry . getElement ( ) . resource ( ) ;
2006-12-11 02:31:23 +01:00
if ( resource ! = null ) {
2007-12-14 20:17:54 +01:00
links . append ( " resource " ) . append ( i ) . append ( '=' ) . append ( resource ) . append ( serverCore . CRLF_STRING ) ;
2006-01-20 16:14:21 +01:00
}
}
2011-01-11 23:58:14 +01:00
theQuery . transmitcount = accu . size ( ) + 1 ;
2007-10-24 23:38:19 +02:00
prop . put ( " links " , links . toString ( ) ) ;
2007-08-28 14:15:46 +02:00
prop . put ( " linkcount " , accu . size ( ) ) ;
2011-07-14 09:07:06 +02:00
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch ( theQuery . id ( true ) , SearchEvent . Type . RESULTLIST , " " , accu . size ( ) , System . currentTimeMillis ( ) - timer ) , false ) ;
2006-01-20 16:14:21 +01:00
}
2011-06-13 23:44:03 +02:00
2007-09-04 11:04:47 +02:00
// prepare search statistics
2010-09-14 17:27:27 +02:00
theQuery . remotepeer = client = = null ? null : sb . peers . lookupByIP ( Domains . dnsResolve ( client ) , true , false , false ) ;
2010-12-02 13:19:59 +01:00
theQuery . resultcount = ( theSearch = = null ) ? 0 : joincount ;
2008-02-18 00:35:48 +01:00
theQuery . searchtime = System . currentTimeMillis ( ) - timestamp ;
2009-08-26 17:59:55 +02:00
theQuery . urlretrievaltime = ( theSearch = = null ) ? 0 : theSearch . result ( ) . getURLRetrievalTime ( ) ;
theQuery . snippetcomputationtime = ( theSearch = = null ) ? 0 : theSearch . result ( ) . getSnippetComputationTime ( ) ;
2010-12-29 02:54:27 +01:00
AccessTracker . add ( AccessTracker . Location . remote , theQuery ) ;
2011-06-13 23:44:03 +02:00
2008-05-17 02:11:35 +02:00
// update the search tracker
2010-05-12 00:26:18 +02:00
synchronized ( trackerHandles ) {
2010-12-29 02:54:27 +01:00
trackerHandles . add ( theQuery . time ) ; // thats the time when the handle was created
2010-05-12 00:26:18 +02:00
// we don't need too much entries in the list; remove superfluous
while ( trackerHandles . size ( ) > 36 ) if ( ! trackerHandles . remove ( trackerHandles . first ( ) ) ) break ;
}
2008-05-17 02:11:35 +02:00
sb . remoteSearchTracker . put ( client , trackerHandles ) ;
2011-05-26 16:35:32 +02:00
if ( MemoryControl . shortStatus ( ) ) sb . remoteSearchTracker . clear ( ) ;
2011-06-13 23:44:03 +02:00
2006-01-20 16:14:21 +01:00
// log
2011-10-04 11:06:24 +02:00
Network . log . logInfo ( " EXIT HASH SEARCH: " +
2009-07-09 00:14:57 +02:00
QueryParams . anonymizedQueryHashes ( theQuery . queryHashes ) + " - " + joincount + " links found, " +
2007-07-31 12:00:17 +02:00
prop . get ( " linkcount " , " ? " ) + " links selected, " +
2007-09-05 11:01:35 +02:00
indexabstractContainercount + " index abstracts, " +
2007-07-31 12:00:17 +02:00
( System . currentTimeMillis ( ) - timestamp ) + " milliseconds " ) ;
2011-06-13 23:44:03 +02:00
2007-10-24 23:38:19 +02:00
prop . put ( " searchtime " , System . currentTimeMillis ( ) - timestamp ) ;
2005-10-05 15:14:18 +02:00
2011-03-15 02:03:35 +01:00
final int links = prop . getInt ( " linkcount " , 0 ) ;
2009-05-28 16:26:05 +02:00
sb . peers . mySeed ( ) . incSI ( links ) ;
sb . peers . mySeed ( ) . incSU ( links ) ;
2005-10-07 17:04:03 +02:00
return prop ;
2005-04-07 21:19:42 +02:00
}
2008-03-12 01:05:18 +01:00
2007-01-16 15:07:54 +01:00
}