2006-09-19 12:44:45 +02:00
// yacysearch.java
// -----------------------
// part of the AnomicHTTPD caching proxy
2008-07-20 19:14:51 +02:00
// (C) by Michael Peter Christen; mc@yacy.net
2006-09-19 12:44:45 +02:00
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// You must compile this file with
// javac -classpath .:../classes yacysearch.java
// if the shell's current path is HTROOT
2009-03-16 01:18:37 +01:00
import java.io.IOException ;
2006-09-19 12:44:45 +02:00
import java.util.HashMap ;
2009-06-12 22:36:03 +02:00
import java.util.Iterator ;
2011-03-09 00:27:41 +01:00
import java.util.Map ;
2010-11-28 03:57:31 +01:00
import java.util.SortedSet ;
2006-09-19 12:44:45 +02:00
import java.util.TreeSet ;
2011-03-23 01:48:19 +01:00
import java.util.regex.Pattern ;
import java.util.regex.PatternSyntaxException ;
2006-09-19 12:44:45 +02:00
2011-05-27 10:24:54 +02:00
import net.yacy.cora.document.ASCII ;
2010-05-25 14:54:57 +02:00
import net.yacy.cora.document.RSSMessage ;
2011-03-07 21:36:40 +01:00
import net.yacy.cora.document.UTF8 ;
2010-08-23 14:32:02 +02:00
import net.yacy.cora.protocol.Domains ;
import net.yacy.cora.protocol.HeaderFramework ;
import net.yacy.cora.protocol.RequestHeader ;
2010-11-02 17:28:40 +01:00
import net.yacy.cora.protocol.ResponseHeader ;
2011-06-13 23:44:03 +02:00
import net.yacy.cora.services.federated.yacy.CacheStrategy ;
2009-10-18 02:53:43 +02:00
import net.yacy.document.Condenser ;
import net.yacy.document.Document ;
2011-02-12 01:01:40 +01:00
import net.yacy.document.LibraryProvider ;
2010-06-29 21:20:45 +02:00
import net.yacy.document.Parser ;
2009-10-20 00:34:44 +02:00
import net.yacy.document.geolocalization.Location ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.DigestURI ;
import net.yacy.kelondro.data.meta.URIMetadataRow ;
import net.yacy.kelondro.data.word.Word ;
2010-04-15 15:22:59 +02:00
import net.yacy.kelondro.index.HandleSet ;
2009-10-10 01:13:30 +02:00
import net.yacy.kelondro.logging.Log ;
2009-10-10 01:22:22 +02:00
import net.yacy.kelondro.order.Bitfield ;
2009-12-08 15:25:51 +01:00
import net.yacy.kelondro.util.EventTracker ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.Formatter ;
2011-06-13 23:44:03 +02:00
import net.yacy.kelondro.util.ISO639 ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.MemoryControl ;
import net.yacy.kelondro.util.SetTools ;
2009-09-01 15:04:35 +02:00
import de.anomic.data.DidYouMean ;
2011-04-02 01:32:40 +02:00
import de.anomic.data.UserDB ;
2010-12-29 02:54:27 +01:00
import de.anomic.search.AccessTracker ;
2009-11-19 00:56:05 +01:00
import de.anomic.search.ContentDomain ;
2009-07-09 00:14:57 +02:00
import de.anomic.search.QueryParams ;
2009-06-16 23:45:40 +02:00
import de.anomic.search.RankingProfile ;
2009-08-24 17:24:02 +02:00
import de.anomic.search.SearchEvent ;
import de.anomic.search.SearchEventCache ;
2009-10-11 02:12:19 +02:00
import de.anomic.search.Segment ;
import de.anomic.search.Segments ;
2009-07-19 22:37:44 +02:00
import de.anomic.search.Switchboard ;
import de.anomic.search.SwitchboardConstants ;
2006-09-19 12:44:45 +02:00
import de.anomic.server.serverCore ;
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
2010-11-02 17:28:40 +01:00
import de.anomic.server.servletProperties ;
2011-06-13 23:44:03 +02:00
import de.anomic.yacy.yacyChannel ;
2007-01-18 11:42:36 +01:00
import de.anomic.yacy.yacyNewsPool ;
2009-10-20 00:34:44 +02:00
import de.anomic.yacy.graphics.ProfilingGraph ;
2006-09-19 12:44:45 +02:00
public class yacysearch {
2009-07-19 22:37:44 +02:00
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
final Switchboard sb = ( Switchboard ) env ;
2007-12-06 22:53:17 +01:00
sb . localSearchLastAccess = System . currentTimeMillis ( ) ;
2011-06-13 23:44:03 +02:00
2008-08-02 14:12:04 +02:00
final boolean searchAllowed = sb . getConfigBool ( " publicSearchpage " , true ) | | sb . verifyAuthentication ( header , false ) ;
2011-06-13 23:44:03 +02:00
2011-04-02 01:32:40 +02:00
boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
if ( ! authenticated ) {
final UserDB . Entry user = sb . userDB . getUser ( header ) ;
authenticated = ( user ! = null & & user . hasRight ( UserDB . AccessRight . EXTENDED_SEARCH_RIGHT ) ) ;
}
2010-09-22 22:50:02 +02:00
final boolean localhostAccess = sb . accessFromLocalhost ( header ) ;
2011-03-08 23:37:17 +01:00
final String promoteSearchPageGreeting =
( env . getConfigBool ( SwitchboardConstants . GREETING_NETWORK_NAME , false ) ) ?
env . getConfig ( " network.unit.description " , " " ) :
env . getConfig ( SwitchboardConstants . GREETING , " " ) ;
2009-07-19 22:37:44 +02:00
final String client = header . get ( HeaderFramework . CONNECTION_PROP_CLIENTIP ) ; // the search client who initiated the search
2011-06-13 23:44:03 +02:00
2008-01-08 21:12:31 +01:00
// get query
2011-03-08 23:37:17 +01:00
final String originalquerystring = ( post = = null ) ? " " : post . get ( " query " , post . get ( " search " , " " ) ) . trim ( ) ;
2010-12-28 02:57:05 +01:00
String querystring = originalquerystring . replace ( '+' , ' ' ) . replace ( '*' , ' ' ) . trim ( ) ;
2011-06-13 23:44:03 +02:00
CacheStrategy snippetFetchStrategy = ( post = = null ) ? null : CacheStrategy . parse ( post . get ( " verify " , " cacheonly " ) ) ;
2010-11-02 17:28:40 +01:00
final servletProperties prop = new servletProperties ( ) ;
2010-12-08 11:50:23 +01:00
prop . put ( " topmenu " , sb . getConfigBool ( " publicTopmenu " , true ) ? 1 : 0 ) ;
2011-06-13 23:44:03 +02:00
2009-10-09 16:44:20 +02:00
// get segment
Segment indexSegment = null ;
if ( post ! = null & & post . containsKey ( " segment " ) ) {
2011-03-08 23:37:17 +01:00
final String segmentName = post . get ( " segment " ) ;
2009-10-09 16:44:20 +02:00
if ( sb . indexSegments . segmentExist ( segmentName ) ) {
indexSegment = sb . indexSegments . segment ( segmentName ) ;
}
} else {
// take default segment
indexSegment = sb . indexSegments . segment ( Segments . Process . PUBLIC ) ;
}
2011-06-13 23:44:03 +02:00
2011-05-26 14:35:24 +02:00
final String EXT = header . get ( " EXT " , " " ) ;
final boolean rss = EXT . equals ( " rss " ) ;
final boolean json = EXT . equals ( " json " ) ;
2009-04-15 13:22:43 +02:00
prop . put ( " promoteSearchPageGreeting " , promoteSearchPageGreeting ) ;
2009-07-19 22:37:44 +02:00
prop . put ( " promoteSearchPageGreeting.homepage " , sb . getConfig ( SwitchboardConstants . GREETING_HOMEPAGE , " " ) ) ;
prop . put ( " promoteSearchPageGreeting.smallImage " , sb . getConfig ( SwitchboardConstants . GREETING_SMALL_IMAGE , " " ) ) ;
2009-10-09 16:44:20 +02:00
if ( post = = null | | indexSegment = = null | | env = = null | | ! searchAllowed ) {
2006-09-19 12:44:45 +02:00
// we create empty entries for template strings
2007-10-24 23:38:19 +02:00
prop . put ( " searchagain " , " 0 " ) ;
2007-04-26 16:28:57 +02:00
prop . put ( " former " , " " ) ;
2009-04-15 13:22:43 +02:00
prop . put ( " count " , " 10 " ) ;
prop . put ( " offset " , " 0 " ) ;
prop . put ( " resource " , " global " ) ;
prop . put ( " urlmaskfilter " , ( post = = null ) ? " .* " : post . get ( " urlmaskfilter " , " .* " ) ) ;
prop . put ( " prefermaskfilter " , ( post = = null ) ? " " : post . get ( " prefermaskfilter " , " " ) ) ;
prop . put ( " tenant " , ( post = = null ) ? " " : post . get ( " tenant " , " " ) ) ;
prop . put ( " indexof " , " off " ) ;
prop . put ( " constraint " , " " ) ;
prop . put ( " cat " , " href " ) ;
prop . put ( " depth " , " 0 " ) ;
2011-03-21 08:50:34 +01:00
prop . put ( " search.verify " , ( post = = null ) ? sb . getConfig ( " search.verify " , " iffresh " ) : post . get ( " verify " , " iffresh " ) ) ;
prop . put ( " search.navigation " , ( post = = null ) ? sb . getConfig ( " search.navigation " , " all " ) : post . get ( " nav " , " all " ) ) ;
2009-04-15 13:22:43 +02:00
prop . put ( " contentdom " , " text " ) ;
prop . put ( " contentdomCheckText " , " 1 " ) ;
prop . put ( " contentdomCheckAudio " , " 0 " ) ;
prop . put ( " contentdomCheckVideo " , " 0 " ) ;
prop . put ( " contentdomCheckImage " , " 0 " ) ;
prop . put ( " contentdomCheckApp " , " 0 " ) ;
2007-10-24 23:38:19 +02:00
prop . put ( " excluded " , " 0 " ) ;
2007-09-06 15:26:38 +02:00
prop . put ( " results " , " " ) ;
2007-10-24 23:38:19 +02:00
prop . put ( " resultTable " , " 0 " ) ;
prop . put ( " num-results " , searchAllowed ? " 0 " : " 4 " ) ;
2009-02-03 14:04:02 +01:00
prop . put ( " num-results_totalcount " , 0 ) ;
prop . put ( " num-results_offset " , 0 ) ;
prop . put ( " num-results_itemsPerPage " , 10 ) ;
2009-09-04 16:32:36 +02:00
prop . put ( " geoinfo " , " 0 " ) ;
2009-02-03 14:04:02 +01:00
prop . put ( " rss_queryenc " , " " ) ;
2009-11-06 20:13:35 +01:00
prop . put ( " meanCount " , 5 ) ;
2006-09-19 12:44:45 +02:00
return prop ;
}
2011-06-13 23:44:03 +02:00
2009-03-07 11:18:47 +01:00
// check for JSONP
if ( post . containsKey ( " callback " ) ) {
final String jsonp = post . get ( " callback " ) + " ([ " ;
prop . put ( " jsonp-start " , jsonp ) ;
prop . put ( " jsonp-end " , " ]) " ) ;
} else {
prop . put ( " jsonp-start " , " " ) ;
prop . put ( " jsonp-end " , " " ) ;
}
2011-06-13 23:44:03 +02:00
2010-11-02 17:28:40 +01:00
// Adding CORS Access header for yacysearch.rss output
if ( rss ) {
final ResponseHeader outgoingHeader = new ResponseHeader ( ) ;
2010-11-27 10:16:16 +01:00
outgoingHeader . put ( HeaderFramework . CORS_ALLOW_ORIGIN , " * " ) ;
2010-11-02 17:28:40 +01:00
prop . setOutgoingHeader ( outgoingHeader ) ;
}
2011-06-13 23:44:03 +02:00
2006-12-01 03:45:49 +01:00
// collect search attributes
2011-03-08 23:37:17 +01:00
final boolean newsearch = post . hasValue ( " query " ) & & post . hasValue ( " former " ) & & ! post . get ( " query " , " " ) . equalsIgnoreCase ( post . get ( " former " , " " ) ) ; //new search term
2011-06-13 23:44:03 +02:00
2010-12-02 13:19:59 +01:00
int itemsPerPage = Math . min ( ( authenticated ) ? ( snippetFetchStrategy ! = null & & snippetFetchStrategy . isAllowedToFetchOnline ( ) ? 100 : 1000 ) : ( snippetFetchStrategy ! = null & & snippetFetchStrategy . isAllowedToFetchOnline ( ) ? 20 : 500 ) , post . getInt ( " maximumRecords " , post . getInt ( " count " , 10 ) ) ) ; // SRU syntax with old property as alternative
2009-01-12 15:59:27 +01:00
int offset = ( newsearch ) ? 0 : post . getInt ( " startRecord " , post . getInt ( " offset " , 0 ) ) ;
2011-06-13 23:44:03 +02:00
2011-03-08 23:37:17 +01:00
final int newcount ;
2011-04-02 01:32:40 +02:00
if ( authenticated & & ( newcount = post . getInt ( " count " , 0 ) ) > 0 ) {
sb . setConfig ( SwitchboardConstants . SEARCH_ITEMS , newcount ) ;
} // set new default maximumRecords if search with "more options"
2011-06-13 23:44:03 +02:00
2010-08-10 01:41:17 +02:00
boolean global = post . get ( " resource " , " local " ) . equals ( " global " ) & & sb . peers . sizeConnected ( ) > 0 ;
2011-06-13 23:44:03 +02:00
final boolean indexof = ( post ! = null & & post . get ( " indexof " , " " ) . equals ( " on " ) ) ;
2011-03-08 23:37:17 +01:00
final String originalUrlMask ;
2009-01-28 20:37:24 +01:00
if ( post . containsKey ( " urlmask " ) & & post . get ( " urlmask " ) . equals ( " no " ) ) { // option search all
originalUrlMask = " .* " ;
} else if ( ! newsearch & & post . containsKey ( " urlmaskfilter " ) ) {
originalUrlMask = post . get ( " urlmaskfilter " , " .* " ) ;
} else {
originalUrlMask = " .* " ;
}
2009-11-09 20:14:51 +01:00
String prefermask = ( post = = null ) ? " " : post . get ( " prefermaskfilter " , " " ) ;
2011-03-23 01:48:19 +01:00
if ( ! prefermask . isEmpty ( ) & & prefermask . indexOf ( " .* " ) < 0 ) {
2011-03-08 23:37:17 +01:00
prefermask = " .* " + prefermask + " .* " ;
}
2006-09-19 12:44:45 +02:00
2011-03-23 01:48:19 +01:00
Bitfield constraint = ( post ! = null & & post . containsKey ( " constraint " ) & & ! post . get ( " constraint " , " " ) . isEmpty ( ) ) ? new Bitfield ( 4 , post . get ( " constraint " , " ______ " ) ) : null ;
2006-11-23 03:16:30 +01:00
if ( indexof ) {
2009-01-30 16:33:00 +01:00
constraint = new Bitfield ( 4 ) ;
2009-04-03 15:23:45 +02:00
constraint . set ( Condenser . flag_cat_indexof , true ) ;
2006-11-23 03:16:30 +01:00
}
2011-06-13 23:44:03 +02:00
2006-12-01 03:45:49 +01:00
// SEARCH
2010-02-09 18:14:16 +01:00
final boolean indexReceiveGranted = sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_ALLOW , true ) | |
2011-03-08 23:37:17 +01:00
sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_AUTODISABLED , true ) ;
2008-09-24 01:30:25 +02:00
global = global & & indexReceiveGranted ; // if the user does not want indexes from remote peers, it cannot be a global search
2011-06-13 23:44:03 +02:00
2007-04-26 11:51:51 +02:00
final boolean clustersearch = sb . isRobinsonMode ( ) & &
2011-03-08 23:37:17 +01:00
( sb . getConfig ( " cluster.mode " , " " ) . equals ( " privatecluster " ) | |
sb . getConfig ( " cluster.mode " , " " ) . equals ( " publiccluster " ) ) ;
2011-04-02 01:32:40 +02:00
if ( clustersearch ) {
global = true ;
} // switches search on, but search target is limited to cluster nodes
2011-06-13 23:44:03 +02:00
2010-02-24 14:53:55 +01:00
// increase search statistic counter
if ( ! global ) {
// we count only searches on the local peer here, because global searches
// are counted on the target peer to preserve privacy of the searcher
if ( authenticated ) {
// local or authenticated search requests are counted separately
// because they are not part of a public available peer statistic
sb . searchQueriesRobinsonFromLocal + + ;
} else {
// robinson-searches from non-authenticated requests are public
// and may be part of the public available statistic
sb . searchQueriesRobinsonFromRemote + + ;
}
}
2011-06-13 23:44:03 +02:00
2006-12-07 03:40:57 +01:00
// find search domain
2010-05-12 02:48:24 +02:00
final ContentDomain contentdom = ContentDomain . contentdomParser ( post = = null ? " text " : post . get ( " contentdom " , " text " ) ) ;
2011-06-13 23:44:03 +02:00
2006-12-12 03:09:25 +01:00
// patch until better search profiles are available
2011-04-02 01:32:40 +02:00
if ( ( contentdom ! = ContentDomain . TEXT ) & & ( itemsPerPage < = 32 ) ) {
itemsPerPage = 64 ;
}
2011-06-13 23:44:03 +02:00
2008-05-17 02:11:35 +02:00
// check the search tracker
TreeSet < Long > trackerHandles = sb . localSearchTracker . get ( client ) ;
2011-04-02 01:32:40 +02:00
if ( trackerHandles = = null ) {
trackerHandles = new TreeSet < Long > ( ) ;
}
2008-05-17 02:11:35 +02:00
boolean block = false ;
2009-10-11 02:12:19 +02:00
if ( Domains . matchesList ( client , sb . networkBlacklist ) ) {
2010-05-12 02:48:24 +02:00
global = false ;
2011-04-02 01:32:40 +02:00
if ( snippetFetchStrategy ! = null ) {
snippetFetchStrategy = null ;
}
2009-01-12 11:55:48 +01:00
block = true ;
2010-09-22 22:50:02 +02:00
Log . logWarning ( " LOCAL_SEARCH " , " ACCESS CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search " ) ;
2009-10-11 02:12:19 +02:00
} else if ( Domains . matchesList ( client , sb . networkWhitelist ) ) {
2010-09-22 22:50:02 +02:00
Log . logInfo ( " LOCAL_SEARCH " , " ACCESS CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions " ) ;
} else if ( ! authenticated & & ! localhostAccess ) {
2008-06-11 11:54:58 +02:00
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
2009-06-16 12:37:13 +02:00
synchronized ( trackerHandles ) {
2011-06-13 23:44:03 +02:00
final int accInThreeSeconds = trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 3000 ) ) . size ( ) ;
final int accInOneMinute = trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 60000 ) ) . size ( ) ;
final int accInTenMinutes = trackerHandles . tailSet ( Long . valueOf ( System . currentTimeMillis ( ) - 600000 ) ) . size ( ) ;
2010-09-22 22:50:02 +02:00
// protections against too strong YaCy network load, reduces remote search
if ( global ) {
2011-02-21 16:24:09 +01:00
if ( accInTenMinutes > = 60 | | accInOneMinute > = 6 | | accInThreeSeconds > = 1 ) {
2010-09-22 22:50:02 +02:00
global = false ;
2011-02-21 16:24:09 +01:00
Log . logWarning ( " LOCAL_SEARCH " , " ACCESS CONTROL: CLIENT FROM " + client + " : " + accInThreeSeconds + " /3s, " + accInOneMinute + " /60s, " + accInTenMinutes + " /600s, " + " requests, disallowed global search " ) ;
2010-09-22 22:50:02 +02:00
}
}
// protection against too many remote server snippet loads (protects traffic on server)
2010-10-09 10:55:57 +02:00
if ( snippetFetchStrategy ! = null & & snippetFetchStrategy . isAllowedToFetchOnline ( ) ) {
2010-09-22 22:50:02 +02:00
if ( accInTenMinutes > = 20 | | accInOneMinute > = 4 | | accInThreeSeconds > = 1 ) {
2011-02-21 16:11:03 +01:00
snippetFetchStrategy = CacheStrategy . CACHEONLY ;
2011-02-21 16:24:09 +01:00
Log . logWarning ( " LOCAL_SEARCH " , " ACCESS CONTROL: CLIENT FROM " + client + " : " + accInThreeSeconds + " /3s, " + accInOneMinute + " /60s, " + accInTenMinutes + " /600s, " + " requests, disallowed remote snippet loading " ) ;
2010-09-22 22:50:02 +02:00
}
}
// general load protection
2011-02-21 16:24:09 +01:00
if ( accInTenMinutes > = 3000 | | accInOneMinute > = 600 | | accInThreeSeconds > = 60 ) {
2009-06-16 12:37:13 +02:00
block = true ;
2011-02-21 16:24:09 +01:00
Log . logWarning ( " LOCAL_SEARCH " , " ACCESS CONTROL: CLIENT FROM " + client + " : " + accInThreeSeconds + " /3s, " + accInOneMinute + " /60s, " + accInTenMinutes + " /600s, " + " requests, disallowed search " ) ;
2009-06-16 12:37:13 +02:00
}
2008-09-03 17:55:25 +02:00
}
2008-06-11 11:54:58 +02:00
}
2011-06-13 23:44:03 +02:00
2008-06-06 18:01:27 +02:00
if ( ( ! block ) & & ( post = = null | | post . get ( " cat " , " href " ) . equals ( " href " ) ) ) {
2011-03-08 23:37:17 +01:00
String urlmask = null ;
2011-06-13 23:44:03 +02:00
2008-06-16 23:39:58 +02:00
// check available memory and clean up if necessary
2009-01-30 16:33:00 +01:00
if ( ! MemoryControl . request ( 8000000L , false ) ) {
2009-10-09 16:44:20 +02:00
indexSegment . urlMetadata ( ) . clearCache ( ) ;
2009-08-24 17:24:02 +02:00
SearchEventCache . cleanupEvents ( true ) ;
2008-06-16 23:39:58 +02:00
}
2011-06-13 23:44:03 +02:00
2009-06-16 23:45:40 +02:00
final RankingProfile ranking = sb . getRanking ( ) ;
2009-04-28 19:12:31 +02:00
2010-11-29 19:08:20 +01:00
if ( querystring . indexOf ( " /near " ) > = 0 ) {
querystring = querystring . replace ( " /near " , " " ) ;
2009-06-16 23:45:40 +02:00
ranking . coeff_worddistance = RankingProfile . COEFF_MAX ;
2008-01-08 21:12:31 +01:00
}
2010-11-29 19:08:20 +01:00
if ( querystring . indexOf ( " /date " ) > = 0 ) {
2011-03-31 11:41:30 +02:00
querystring = querystring . replace ( " /date " , " " ) ;
2009-06-16 23:45:40 +02:00
ranking . coeff_date = RankingProfile . COEFF_MAX ;
2008-05-18 23:29:43 +02:00
}
2011-03-31 11:41:30 +02:00
if ( querystring . indexOf ( " /location " ) > = 0 ) {
querystring = querystring . replace ( " /location " , " " ) ;
2011-04-02 01:32:40 +02:00
if ( constraint = = null ) {
constraint = new Bitfield ( 4 ) ;
}
2011-03-31 11:41:30 +02:00
constraint . set ( Condenser . flag_cat_haslocation , true ) ;
}
2011-06-13 23:44:03 +02:00
final int lrp = querystring . indexOf ( " /language/ " ) ;
2008-09-21 09:28:57 +02:00
String lr = " " ;
if ( lrp > = 0 ) {
2011-03-08 23:37:17 +01:00
if ( querystring . length ( ) > = ( lrp + 11 ) ) {
lr = querystring . substring ( lrp + 9 , lrp + 11 ) ;
}
2010-11-29 19:08:20 +01:00
querystring = querystring . replace ( " /language/ " + lr , " " ) ;
2009-04-28 20:59:56 +02:00
lr = lr . toLowerCase ( ) ;
2008-09-21 09:28:57 +02:00
}
2011-03-08 23:37:17 +01:00
final int inurl = querystring . indexOf ( " inurl: " ) ;
2009-01-08 19:59:29 +01:00
if ( inurl > = 0 ) {
int ftb = querystring . indexOf ( ' ' , inurl ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) ;
}
2011-06-13 23:44:03 +02:00
final String urlstr = querystring . substring ( inurl + 6 , ftb ) ;
2009-04-28 19:12:31 +02:00
querystring = querystring . replace ( " inurl: " + urlstr , " " ) ;
2011-04-02 01:32:40 +02:00
if ( ! urlstr . isEmpty ( ) ) {
urlmask = " .* " + urlstr + " .* " ;
}
2009-01-08 19:59:29 +01:00
}
2011-03-08 23:37:17 +01:00
final int filetype = querystring . indexOf ( " filetype: " ) ;
2008-12-29 18:57:04 +01:00
if ( filetype > = 0 ) {
int ftb = querystring . indexOf ( ' ' , filetype ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) ;
}
2008-12-29 18:57:04 +01:00
String ft = querystring . substring ( filetype + 9 , ftb ) ;
2009-04-28 19:12:31 +02:00
querystring = querystring . replace ( " filetype: " + ft , " " ) ;
2011-04-02 01:32:40 +02:00
while ( ! ft . isEmpty ( ) & & ft . charAt ( 0 ) = = '.' ) ft = ft . substring ( 1 ) ;
if ( ! ft . isEmpty ( ) ) {
2009-01-25 23:16:49 +01:00
if ( urlmask = = null ) {
urlmask = " .* \\ . " + ft ;
} else {
urlmask = urlmask + " .* \\ . " + ft ;
}
}
2009-01-04 15:58:32 +01:00
}
2009-06-22 14:25:18 +02:00
String tenant = null ;
2009-04-02 15:26:47 +02:00
if ( post . containsKey ( " tenant " ) ) {
2009-06-22 14:25:18 +02:00
tenant = post . get ( " tenant " ) ;
2011-04-02 01:32:40 +02:00
if ( tenant ! = null & & tenant . isEmpty ( ) ) {
tenant = null ;
}
2009-06-23 00:31:29 +02:00
if ( tenant ! = null ) {
2011-04-02 01:32:40 +02:00
if ( urlmask = = null ) {
urlmask = " .* " + tenant + " .* " ;
} else urlmask = " .* " + tenant + urlmask ;
2009-06-23 00:31:29 +02:00
}
2009-04-02 15:26:47 +02:00
}
2011-06-13 23:44:03 +02:00
final int site = querystring . indexOf ( " site: " ) ;
2009-04-02 15:26:47 +02:00
String sitehash = null ;
2010-06-23 13:19:32 +02:00
String sitehost = null ;
2009-01-04 15:58:32 +01:00
if ( site > = 0 ) {
int ftb = querystring . indexOf ( ' ' , site ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) ;
}
2010-06-23 13:19:32 +02:00
sitehost = querystring . substring ( site + 5 , ftb ) ;
querystring = querystring . replace ( " site: " + sitehost , " " ) ;
2011-04-02 01:32:40 +02:00
while ( sitehost . length ( ) > 0 & & sitehost . charAt ( 0 ) = = '.' ) {
sitehost = sitehost . substring ( 1 ) ;
}
while ( sitehost . endsWith ( " . " ) ) {
sitehost = sitehost . substring ( 0 , sitehost . length ( ) - 1 ) ;
}
2011-05-30 06:19:20 +02:00
sitehash = DigestURI . hosthash ( sitehost ) ;
2009-04-28 19:12:31 +02:00
}
2011-06-13 23:44:03 +02:00
2011-03-08 23:37:17 +01:00
final int heuristicScroogle = querystring . indexOf ( " heuristic:scroogle " ) ;
2010-11-29 19:08:20 +01:00
if ( heuristicScroogle > = 0 ) {
2010-06-25 18:44:57 +02:00
querystring = querystring . replace ( " heuristic:scroogle " , " " ) ;
}
2011-06-13 23:44:03 +02:00
2011-03-08 23:37:17 +01:00
final int heuristicBlekko = querystring . indexOf ( " heuristic:blekko " ) ;
2010-11-29 19:08:20 +01:00
if ( heuristicBlekko > = 0 ) {
querystring = querystring . replace ( " heuristic:blekko " , " " ) ;
}
2011-06-13 23:44:03 +02:00
2011-03-08 23:37:17 +01:00
final int authori = querystring . indexOf ( " author: " ) ;
2009-06-09 01:30:12 +02:00
String authorhash = null ;
if ( authori > = 0 ) {
// check if the author was given with single quotes or without
2011-03-08 23:37:17 +01:00
final boolean quotes = ( querystring . charAt ( authori + 7 ) = = ( char ) 39 ) ;
2009-06-09 01:30:12 +02:00
String author ;
if ( quotes ) {
2011-03-08 23:37:17 +01:00
int ftb = querystring . indexOf ( ( char ) 39 , authori + 8 ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) + 1 ;
}
2009-06-09 01:30:12 +02:00
author = querystring . substring ( authori + 8 , ftb ) ;
querystring = querystring . replace ( " author:' " + author + " ' " , " " ) ;
} else {
2011-03-08 23:37:17 +01:00
int ftb = querystring . indexOf ( ' ' , authori ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) ;
}
2011-03-08 23:37:17 +01:00
author = querystring . substring ( authori + 7 , ftb ) ;
2009-06-09 01:30:12 +02:00
querystring = querystring . replace ( " author: " + author , " " ) ;
}
2011-05-27 10:24:54 +02:00
authorhash = ASCII . String ( Word . word2hash ( author ) ) ;
2009-06-09 01:30:12 +02:00
}
2011-03-08 23:37:17 +01:00
final int tld = querystring . indexOf ( " tld: " ) ;
2009-04-28 19:12:31 +02:00
if ( tld > = 0 ) {
int ftb = querystring . indexOf ( ' ' , tld ) ;
2011-04-02 01:32:40 +02:00
if ( ftb = = - 1 ) {
ftb = querystring . length ( ) ;
}
2009-04-28 19:12:31 +02:00
String domain = querystring . substring ( tld + 4 , ftb ) ;
querystring = querystring . replace ( " tld: " + domain , " " ) ;
2011-04-02 01:32:40 +02:00
while ( domain . length ( ) > 0 & & domain . charAt ( 0 ) = = '.' ) {
domain = domain . substring ( 1 ) ;
}
if ( domain . indexOf ( '.' ) < 0 ) {
domain = " \\ . " + domain ;
} // is tld
2009-01-25 23:16:49 +01:00
if ( domain . length ( ) > 0 ) {
2011-04-02 01:32:40 +02:00
urlmask = " [a-zA-Z]*://[^/]* " + domain + " /.* " + ( ( urlmask ! = null ) ? urlmask : " " ) ;
2009-01-25 23:16:49 +01:00
}
2009-01-12 15:59:27 +01:00
}
2011-04-02 01:32:40 +02:00
if ( urlmask = = null | | urlmask . isEmpty ( ) ) {
urlmask = originalUrlMask ;
} //if no urlmask was given
2011-06-13 23:44:03 +02:00
2008-09-21 09:28:57 +02:00
// read the language from the language-restrict option 'lr'
// if no one is given, use the user agent or the system language as default
String language = ( post = = null ) ? lr : post . get ( " lr " , lr ) ;
2011-04-02 01:32:40 +02:00
if ( language . startsWith ( " lang_ " ) ) {
language = language . substring ( 5 ) ;
}
2009-10-11 02:12:19 +02:00
if ( ! ISO639 . exists ( language ) ) {
2008-09-21 09:28:57 +02:00
// find out language of the user by reading of the user-agent string
2009-07-19 22:37:44 +02:00
String agent = header . get ( HeaderFramework . ACCEPT_LANGUAGE ) ;
2011-04-02 01:32:40 +02:00
if ( agent = = null ) {
agent = System . getProperty ( " user.language " ) ;
}
2009-10-11 02:12:19 +02:00
language = ( agent = = null ) ? " en " : ISO639 . userAgentLanguageDetection ( agent ) ;
2011-04-02 01:32:40 +02:00
if ( language = = null ) {
language = " en " ;
}
2008-09-21 09:28:57 +02:00
}
2011-06-13 23:44:03 +02:00
2009-06-07 23:48:01 +02:00
// navigation
2011-03-21 08:50:34 +01:00
final String navigation = ( post = = null ) ? sb . getConfig ( " search.navigation " , " all " ) : post . get ( " nav " , " " ) ;
2011-06-13 23:44:03 +02:00
2009-06-07 23:48:01 +02:00
// the query
2009-07-09 00:14:57 +02:00
final TreeSet < String > [ ] query = QueryParams . cleanQuery ( querystring . trim ( ) ) ; // converts also umlaute
2011-06-13 23:44:03 +02:00
final int maxDistance = ( querystring . indexOf ( '"' ) > = 0 ) ? query . length - 1 : Integer . MAX_VALUE ;
2008-09-03 01:49:48 +02:00
2007-09-06 15:26:38 +02:00
// filter out stopwords
2010-11-28 03:57:31 +01:00
final SortedSet < String > filtered = SetTools . joinConstructive ( query [ 0 ] , Switchboard . stopwords ) ;
2009-12-02 01:37:59 +01:00
if ( ! filtered . isEmpty ( ) ) {
2009-07-19 22:37:44 +02:00
SetTools . excludeDestructive ( query [ 0 ] , Switchboard . stopwords ) ;
2006-09-19 12:44:45 +02:00
}
2006-12-20 16:44:29 +01:00
2007-09-06 15:26:38 +02:00
// if a minus-button was hit, remove a special reference first
2011-04-02 01:32:40 +02:00
if ( post ! = null & & post . containsKey ( " deleteref " ) ) {
try {
if ( ! sb . verifyAuthentication ( header , true ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
2007-09-06 15:26:38 +02:00
2011-04-02 01:32:40 +02:00
// delete the index entry locally
final String delHash = post . get ( " deleteref " , " " ) ; // urlhash
indexSegment . termIndex ( ) . remove ( Word . words2hashesHandles ( query [ 0 ] ) , delHash . getBytes ( ) ) ;
// make new news message with negative voting
if ( ! sb . isRobinsonMode ( ) ) {
final Map < String , String > map = new HashMap < String , String > ( ) ;
map . put ( " urlhash " , delHash ) ;
map . put ( " vote " , " negative " ) ;
map . put ( " refid " , " " ) ;
sb . peers . newsPool . publishMyNews ( sb . peers . mySeed ( ) , yacyNewsPool . CATEGORY_SURFTIPP_VOTE_ADD , map ) ;
}
2011-06-13 23:44:03 +02:00
} catch ( final IOException e ) {
2011-04-02 01:32:40 +02:00
Log . logException ( e ) ;
2011-01-12 01:00:14 +01:00
}
2006-12-20 16:44:29 +01:00
}
2007-09-06 15:26:38 +02:00
// if a plus-button was hit, create new voting message
2008-06-06 18:01:27 +02:00
if ( post ! = null & & post . containsKey ( " recommendref " ) ) {
2007-09-06 15:26:38 +02:00
if ( ! sb . verifyAuthentication ( header , true ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
final String recommendHash = post . get ( " recommendref " , " " ) ; // urlhash
2011-05-13 08:21:40 +02:00
final URIMetadataRow urlentry = indexSegment . urlMetadata ( ) . load ( UTF8 . getBytes ( recommendHash ) ) ;
2007-09-06 15:26:38 +02:00
if ( urlentry ! = null ) {
2009-10-11 02:12:19 +02:00
final URIMetadataRow . Components metadata = urlentry . metadata ( ) ;
2010-06-29 21:20:45 +02:00
Document [ ] documents = null ;
2010-06-22 14:28:53 +02:00
try {
2011-06-13 23:44:03 +02:00
documents = sb . loader . loadDocuments ( sb . loader . request ( metadata . url ( ) , true , false ) , CacheStrategy . IFEXIST , 5000 , Long . MAX_VALUE ) ;
} catch ( final IOException e ) {
} catch ( final Parser . Failure e ) {
2010-06-22 14:28:53 +02:00
}
2010-06-29 21:20:45 +02:00
if ( documents ! = null ) {
2007-09-06 15:26:38 +02:00
// create a news message
2011-03-08 23:37:17 +01:00
final Map < String , String > map = new HashMap < String , String > ( ) ;
2009-03-02 12:04:13 +01:00
map . put ( " url " , metadata . url ( ) . toNormalform ( false , true ) . replace ( ',' , '|' ) ) ;
map . put ( " title " , metadata . dc_title ( ) . replace ( ',' , ' ' ) ) ;
2010-06-29 21:20:45 +02:00
map . put ( " description " , documents [ 0 ] . dc_title ( ) . replace ( ',' , ' ' ) ) ;
map . put ( " author " , documents [ 0 ] . dc_creator ( ) ) ;
map . put ( " tags " , documents [ 0 ] . dc_subject ( ' ' ) ) ;
2010-06-15 12:43:47 +02:00
sb . peers . newsPool . publishMyNews ( sb . peers . mySeed ( ) , yacyNewsPool . CATEGORY_SURFTIPP_ADD , map ) ;
2010-06-29 21:20:45 +02:00
documents [ 0 ] . close ( ) ;
2007-09-06 15:26:38 +02:00
}
2006-09-19 12:44:45 +02:00
}
}
2007-09-06 15:26:38 +02:00
// prepare search properties
2011-04-02 01:32:40 +02:00
final boolean globalsearch = ( global ) & & indexReceiveGranted ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
// do the search
2010-04-15 15:22:59 +02:00
final HandleSet queryHashes = Word . words2hashesHandles ( query [ 0 ] ) ;
2011-05-05 02:25:14 +02:00
final Pattern snippetPattern = QueryParams . stringSearchPattern ( originalquerystring ) ;
2011-06-13 23:44:03 +02:00
2011-03-23 01:48:19 +01:00
// check filters
try {
Pattern . compile ( urlmask ) ;
} catch ( final PatternSyntaxException ex ) {
Log . logWarning ( " SEARCH " , " Illegal URL mask, not a valid regex: " + urlmask ) ;
prop . put ( " urlmaskerror " , 1 ) ;
prop . putHTML ( " urlmaskerror_urlmask " , urlmask ) ;
urlmask = " .* " ;
}
try {
Pattern . compile ( prefermask ) ;
} catch ( final PatternSyntaxException ex ) {
Log . logWarning ( " SEARCH " , " Illegal prefer mask, not a valid regex: " + prefermask ) ;
prop . put ( " prefermaskerror " , 1 ) ;
prop . putHTML ( " prefermaskerror_prefermask " , prefermask ) ;
prefermask = " " ;
}
2009-07-09 00:14:57 +02:00
final QueryParams theQuery = new QueryParams (
2011-03-08 23:37:17 +01:00
originalquerystring ,
queryHashes ,
Word . words2hashesHandles ( query [ 1 ] ) ,
Word . words2hashesHandles ( query [ 2 ] ) ,
2011-05-05 02:25:14 +02:00
snippetPattern ,
2011-03-08 23:37:17 +01:00
tenant ,
2006-09-19 12:44:45 +02:00
maxDistance ,
prefermask ,
2010-01-21 23:03:02 +01:00
contentdom ,
2008-09-21 02:04:42 +02:00
language ,
2009-06-07 23:48:01 +02:00
navigation ,
2010-06-21 16:54:54 +02:00
snippetFetchStrategy ,
2007-10-02 23:43:05 +02:00
itemsPerPage ,
2007-09-04 01:43:55 +02:00
offset ,
2006-09-19 12:44:45 +02:00
urlmask ,
2009-07-09 00:14:57 +02:00
( clustersearch & & globalsearch ) ? QueryParams . SEARCHDOM_CLUSTERALL :
( ( globalsearch ) ? QueryParams . SEARCHDOM_GLOBALDHT : QueryParams . SEARCHDOM_LOCAL ) ,
2006-12-20 16:44:29 +01:00
20 ,
2007-11-16 15:48:09 +01:00
constraint ,
2008-02-18 00:35:48 +01:00
true ,
2009-04-02 15:26:47 +02:00
sitehash ,
2009-06-09 01:30:12 +02:00
authorhash ,
2009-10-11 02:12:19 +02:00
DigestURI . TLD_any_zone_filter ,
2008-05-23 11:45:33 +02:00
client ,
2009-11-24 12:13:11 +01:00
authenticated ,
indexSegment ,
2010-10-18 10:09:59 +02:00
ranking ,
2011-01-22 10:46:00 +01:00
header . get ( RequestHeader . USER_AGENT , " " ) ,
sb . getConfigBool ( SwitchboardConstants . NETWORK_SEARCHVERIFY , false ) & & sb . peers . mySeed ( ) . getFlagAcceptRemoteIndex ( ) ) ;
2010-09-14 11:06:27 +02:00
EventTracker . delete ( EventTracker . EClass . SEARCH ) ;
2010-09-13 11:33:04 +02:00
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . searchEvent ( theQuery . id ( true ) , SearchEvent . Type . INITIALIZATION , " " , 0 , 0 ) , false ) ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
// tell all threads to do nothing for a specific time
2010-04-14 01:29:55 +02:00
sb . intermissionAllThreads ( 3000 ) ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
// filter out words that appear in bluelist
2009-07-19 22:37:44 +02:00
theQuery . filterOut ( Switchboard . blueList ) ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
// log
2009-07-09 00:14:57 +02:00
Log . logInfo ( " LOCAL_SEARCH " , " INIT WORD SEARCH: " + theQuery . queryString + " : " + QueryParams . hashSet2hashString ( theQuery . queryHashes ) + " - " + theQuery . neededResults ( ) + " links to be computed, " + theQuery . displayResults ( ) + " lines to be displayed " ) ;
2010-06-29 21:20:45 +02:00
yacyChannel . channels ( yacyChannel . LOCALSEARCH ) . addMessage ( new RSSMessage ( " Local Search Request " , theQuery . queryString , " " ) ) ;
2008-08-02 14:12:04 +02:00
final long timestamp = System . currentTimeMillis ( ) ;
2007-09-06 15:26:38 +02:00
// create a new search event
2009-08-24 17:24:02 +02:00
if ( SearchEventCache . getEvent ( theQuery . id ( false ) ) = = null ) {
2011-06-13 23:44:03 +02:00
theQuery . setOffset ( 0 ) ; // in case that this is a new search, always start without a offset
2007-09-06 15:26:38 +02:00
offset = 0 ;
}
2011-03-04 14:44:00 +01:00
final SearchEvent theSearch = SearchEventCache . getEvent (
2011-03-08 23:37:17 +01:00
theQuery , sb . peers , sb . tables , ( sb . isRobinsonMode ( ) ) ? sb . clusterhashes : null , false , sb . loader ,
( int ) sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXCOUNT_USER , sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXCOUNT_DEFAULT , 10 ) ) ,
sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXTIME_USER , sb . getConfigLong ( SwitchboardConstants . REMOTESEARCH_MAXTIME_DEFAULT , 3000 ) ) ,
( int ) sb . getConfigLong ( SwitchboardConstants . DHT_BURST_ROBINSON , 0 ) ,
( int ) sb . getConfigLong ( SwitchboardConstants . DHT_BURST_MULTIWORD , 0 ) ) ;
try {
Thread . sleep ( global ? 100 : 10 ) ;
2011-06-13 23:44:03 +02:00
} catch ( final InterruptedException e1 ) { } // wait a little time to get first results in the search
2010-11-29 19:08:20 +01:00
if ( offset = = 0 ) {
2011-04-02 01:32:40 +02:00
if ( sitehost ! = null & & sb . getConfigBool ( " heuristic.site " , false ) & & authenticated ) {
sb . heuristicSite ( theSearch , sitehost ) ;
}
if ( ( heuristicScroogle > = 0 | | sb . getConfigBool ( " heuristic.scroogle " , false ) ) & & authenticated ) {
sb . heuristicScroogle ( theSearch ) ;
}
if ( ( heuristicBlekko > = 0 | | sb . getConfigBool ( " heuristic.blekko " , false ) ) & & authenticated ) {
sb . heuristicRSS ( " http://blekko.com/ws/$+/rss " , theSearch , " blekko " ) ;
}
2010-11-29 19:08:20 +01:00
}
2007-08-15 13:36:59 +02:00
2007-09-06 15:26:38 +02:00
// log
2009-01-31 00:33:47 +01:00
Log . logInfo ( " LOCAL_SEARCH " , " EXIT WORD SEARCH: " + theQuery . queryString + " - " +
2010-12-02 13:19:59 +01:00
" local-unfiltered( " + theSearch . getRankingResult ( ) . getLocalIndexCount ( ) + " ), " +
" -local_miss( " + theSearch . getRankingResult ( ) . getMissCount ( ) + " ), " +
2011-05-07 01:04:27 +02:00
" -local_sortout( " + theSearch . getRankingResult ( ) . getSortOutCount ( ) + " ), " +
2010-12-02 13:19:59 +01:00
" remote( " + theSearch . getRankingResult ( ) . getRemoteResourceSize ( ) + " ) links found, " +
2008-07-15 19:35:02 +02:00
( System . currentTimeMillis ( ) - timestamp ) + " ms " ) ;
2007-08-28 14:15:46 +02:00
2007-09-06 15:26:38 +02:00
// prepare search statistics
2011-05-07 01:04:27 +02:00
theQuery . resultcount = theSearch . getRankingResult ( ) . getLocalIndexCount ( ) - theSearch . getRankingResult ( ) . getMissCount ( ) - theSearch . getRankingResult ( ) . getSortOutCount ( ) + theSearch . getRankingResult ( ) . getRemoteIndexCount ( ) ;
2008-02-18 00:35:48 +01:00
theQuery . searchtime = System . currentTimeMillis ( ) - timestamp ;
2009-08-26 17:59:55 +02:00
theQuery . urlretrievaltime = theSearch . result ( ) . getURLRetrievalTime ( ) ;
theQuery . snippetcomputationtime = theSearch . result ( ) . getSnippetComputationTime ( ) ;
2010-12-29 02:54:27 +01:00
AccessTracker . add ( AccessTracker . Location . local , theQuery ) ;
2011-06-13 23:44:03 +02:00
2009-06-12 22:36:03 +02:00
// check suggestions
2011-03-15 02:03:35 +01:00
final int meanMax = ( post ! = null ) ? post . getInt ( " meanCount " , 0 ) : 0 ;
2009-06-13 09:02:50 +02:00
prop . put ( " meanCount " , meanMax ) ;
2011-05-26 14:35:24 +02:00
if ( meanMax > 0 & & ! json & & ! rss ) {
2011-03-08 23:37:17 +01:00
final DidYouMean didYouMean = new DidYouMean ( indexSegment . termIndex ( ) , querystring ) ;
final Iterator < String > meanIt = didYouMean . getSuggestions ( 100 , 5 ) . iterator ( ) ;
2009-06-13 08:20:05 +02:00
int meanCount = 0 ;
String suggestion ;
2011-03-15 02:03:35 +01:00
while ( meanCount < meanMax & & meanIt . hasNext ( ) ) {
2011-03-08 23:37:17 +01:00
suggestion = meanIt . next ( ) ;
prop . put ( " didYouMean_suggestions_ " + meanCount + " _word " , suggestion ) ;
prop . put ( " didYouMean_suggestions_ " + meanCount + " _url " ,
2011-06-13 23:44:03 +02:00
QueryParams . navurl ( " html " , 0 , theQuery , suggestion , originalUrlMask . toString ( ) , theQuery . navigators ) . toString ( )
2009-06-13 08:20:05 +02:00
) ;
2011-03-08 23:37:17 +01:00
prop . put ( " didYouMean_suggestions_ " + meanCount + " _sep " , " | " ) ;
meanCount + + ;
2009-06-13 08:20:05 +02:00
}
prop . put ( " didYouMean_suggestions_ " + ( meanCount - 1 ) + " _sep " , " " ) ;
2009-06-13 09:02:50 +02:00
prop . put ( " didYouMean " , meanCount > 0 ? 1 : 0 ) ;
2009-06-13 08:20:05 +02:00
prop . put ( " didYouMean_suggestions " , meanCount ) ;
} else {
prop . put ( " didYouMean " , 0 ) ;
2009-06-12 22:36:03 +02:00
}
2011-06-13 23:44:03 +02:00
2009-09-04 16:32:36 +02:00
// find geographic info
2011-03-15 02:03:35 +01:00
final SortedSet < Location > coordinates = LibraryProvider . geoLoc . find ( originalquerystring , false ) ;
2009-12-02 01:37:59 +01:00
if ( coordinates = = null | | coordinates . isEmpty ( ) | | offset > 0 ) {
2009-09-04 16:32:36 +02:00
prop . put ( " geoinfo " , " 0 " ) ;
} else {
int i = 0 ;
2011-03-15 02:03:35 +01:00
for ( final Location c : coordinates ) {
2011-02-02 01:06:29 +01:00
prop . put ( " geoinfo_loc_ " + i + " _lon " , Math . round ( c . lon ( ) * 10000 . 0f ) / 10000 . 0f ) ;
prop . put ( " geoinfo_loc_ " + i + " _lat " , Math . round ( c . lat ( ) * 10000 . 0f ) / 10000 . 0f ) ;
2009-09-08 12:18:03 +02:00
prop . put ( " geoinfo_loc_ " + i + " _name " , c . getName ( ) ) ;
2009-09-04 16:32:36 +02:00
i + + ;
2010-05-21 10:18:04 +02:00
if ( i > = 10 ) break ;
2009-09-04 16:32:36 +02:00
}
prop . put ( " geoinfo_loc " , i ) ;
prop . put ( " geoinfo " , " 1 " ) ;
}
2011-06-13 23:44:03 +02:00
2008-05-17 02:11:35 +02:00
// update the search tracker
2009-06-11 01:02:42 +02:00
try {
2009-06-16 12:37:13 +02:00
synchronized ( trackerHandles ) {
2011-03-08 23:37:17 +01:00
trackerHandles . add ( theQuery . time ) ;
while ( trackerHandles . size ( ) > 600 ) {
if ( ! trackerHandles . remove ( trackerHandles . first ( ) ) ) break ;
}
2009-06-16 12:37:13 +02:00
}
2010-05-12 01:06:39 +02:00
sb . localSearchTracker . put ( client , trackerHandles ) ;
2011-05-26 16:35:32 +02:00
if ( sb . localSearchTracker . size ( ) > 100 ) {
2011-04-02 01:32:40 +02:00
sb . localSearchTracker . remove ( sb . localSearchTracker . keys ( ) . nextElement ( ) ) ;
}
2011-05-26 16:35:32 +02:00
if ( MemoryControl . shortStatus ( ) ) sb . localSearchTracker . clear ( ) ;
2011-06-13 23:44:03 +02:00
} catch ( final Exception e ) {
2009-11-05 21:28:37 +01:00
Log . logException ( e ) ;
2009-06-11 01:02:42 +02:00
}
2011-06-13 23:44:03 +02:00
2011-05-07 01:04:27 +02:00
final int indexcount = theSearch . getRankingResult ( ) . getLocalIndexCount ( ) - theSearch . getRankingResult ( ) . getMissCount ( ) - theSearch . getRankingResult ( ) . getSortOutCount ( ) + theSearch . getRankingResult ( ) . getRemoteIndexCount ( ) ;
2007-10-02 23:43:05 +02:00
prop . put ( " num-results_offset " , offset ) ;
2009-11-24 12:13:11 +01:00
prop . put ( " num-results_itemscount " , Formatter . number ( 0 , true ) ) ;
2007-10-02 23:43:05 +02:00
prop . put ( " num-results_itemsPerPage " , itemsPerPage ) ;
2010-12-02 13:19:59 +01:00
prop . put ( " num-results_totalcount " , Formatter . number ( indexcount , true ) ) ;
2008-01-30 22:58:30 +01:00
prop . put ( " num-results_globalresults " , ( globalsearch ) ? " 1 " : " 0 " ) ;
2010-01-13 01:04:37 +01:00
prop . put ( " num-results_globalresults_localResourceSize " , Formatter . number ( theSearch . getRankingResult ( ) . getLocalIndexCount ( ) , true ) ) ;
2010-12-02 13:19:59 +01:00
prop . put ( " num-results_globalresults_localMissCount " , Formatter . number ( theSearch . getRankingResult ( ) . getMissCount ( ) , true ) ) ;
2009-01-31 02:06:56 +01:00
prop . put ( " num-results_globalresults_remoteResourceSize " , Formatter . number ( theSearch . getRankingResult ( ) . getRemoteResourceSize ( ) , true ) ) ;
prop . put ( " num-results_globalresults_remoteIndexCount " , Formatter . number ( theSearch . getRankingResult ( ) . getRemoteIndexCount ( ) , true ) ) ;
prop . put ( " num-results_globalresults_remotePeerCount " , Formatter . number ( theSearch . getRankingResult ( ) . getRemotePeerCount ( ) , true ) ) ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
// compose page navigation
2008-12-04 13:54:16 +01:00
final StringBuilder resnav = new StringBuilder ( ) ;
2008-08-02 14:12:04 +02:00
final int thispage = offset / theQuery . displayResults ( ) ;
2009-05-15 01:11:10 +02:00
if ( thispage = = 0 ) {
2010-05-20 08:36:02 +02:00
resnav . append ( " <img src= \" env/grafics/navdl.gif \" alt= \" arrowleft \" width= \" 16 \" height= \" 16 \" /> " ) ;
2009-05-15 01:11:10 +02:00
} else {
2010-09-10 12:42:01 +02:00
resnav . append ( " <a id= \" prevpage \" href= \" " ) ;
2011-06-13 23:44:03 +02:00
resnav . append ( QueryParams . navurl ( " html " , thispage - 1 , theQuery , null , originalUrlMask , navigation ) . toString ( ) ) ;
2010-05-20 08:36:02 +02:00
resnav . append ( " \" ><img src= \" env/grafics/navdl.gif \" alt= \" arrowleft \" width= \" 16 \" height= \" 16 \" /></a> " ) ;
2007-09-04 01:43:55 +02:00
}
2010-12-06 00:54:00 +01:00
final int numberofpages = Math . min ( 10 , 1 + ( ( indexcount - 1 ) / theQuery . displayResults ( ) ) ) ;
2011-06-13 23:44:03 +02:00
2007-09-06 15:26:38 +02:00
for ( int i = 0 ; i < numberofpages ; i + + ) {
if ( i = = thispage ) {
2009-05-15 01:11:10 +02:00
resnav . append ( " <img src= \" env/grafics/navs " ) ;
resnav . append ( i + 1 ) ;
2010-05-20 08:36:02 +02:00
resnav . append ( " .gif \" alt= \" page " ) ;
2011-03-08 23:37:17 +01:00
resnav . append ( i + 1 ) ;
resnav . append ( " \" width= \" 16 \" height= \" 16 \" /> " ) ;
2007-09-06 15:26:38 +02:00
} else {
2009-06-04 12:54:49 +02:00
resnav . append ( " <a href= \" " ) ;
2011-06-13 23:44:03 +02:00
resnav . append ( QueryParams . navurl ( " html " , i , theQuery , null , originalUrlMask , navigation ) . toString ( ) ) ;
2009-06-04 12:54:49 +02:00
resnav . append ( " \" ><img src= \" env/grafics/navd " ) ;
2011-03-08 23:37:17 +01:00
resnav . append ( i + 1 ) ;
resnav . append ( " .gif \" alt= \" page " ) ;
resnav . append ( i + 1 ) ;
resnav . append ( " \" width= \" 16 \" height= \" 16 \" /></a> " ) ;
2007-09-06 15:26:38 +02:00
}
}
2009-05-15 01:11:10 +02:00
if ( thispage > = numberofpages ) {
2010-05-20 08:36:02 +02:00
resnav . append ( " <img src= \" env/grafics/navdr.gif \" alt= \" arrowright \" width= \" 16 \" height= \" 16 \" /> " ) ;
2009-05-15 01:11:10 +02:00
} else {
2010-09-10 12:42:01 +02:00
resnav . append ( " <a id= \" nextpage \" href= \" " ) ;
2011-06-13 23:44:03 +02:00
resnav . append ( QueryParams . navurl ( " html " , thispage + 1 , theQuery , null , originalUrlMask , navigation ) . toString ( ) ) ;
2010-05-20 08:36:02 +02:00
resnav . append ( " \" ><img src= \" env/grafics/navdr.gif \" alt= \" arrowright \" width= \" 16 \" height= \" 16 \" /></a> " ) ;
2007-09-06 15:26:38 +02:00
}
2011-03-08 23:37:17 +01:00
final String resnavs = resnav . toString ( ) ;
2009-11-24 12:13:11 +01:00
prop . put ( " num-results_resnav " , resnavs ) ;
2010-12-02 13:19:59 +01:00
prop . put ( " pageNavBottom " , ( indexcount - offset > 6 ) ? 1 : 0 ) ; // if there are more results than may fit on the page we add a navigation at the bottom
2009-11-24 12:13:11 +01:00
prop . put ( " pageNavBottom_resnav " , resnavs ) ;
2011-06-13 23:44:03 +02:00
2009-05-26 00:27:34 +02:00
// generate the search result lines; the content will be produced by another servlet
2009-01-07 00:16:10 +01:00
for ( int i = 0 ; i < theQuery . displayResults ( ) ; i + + ) {
2007-09-06 15:26:38 +02:00
prop . put ( " results_ " + i + " _item " , offset + i ) ;
2007-12-12 19:57:43 +01:00
prop . put ( " results_ " + i + " _eventID " , theQuery . id ( false ) ) ;
2007-09-06 15:26:38 +02:00
}
prop . put ( " results " , theQuery . displayResults ( ) ) ;
2010-01-21 23:03:02 +01:00
prop . put ( " resultTable " , ( contentdom = = ContentDomain . APP | | contentdom = = ContentDomain . AUDIO | | contentdom = = ContentDomain . VIDEO ) ? 1 : 0 ) ;
2007-12-12 19:57:43 +01:00
prop . put ( " eventID " , theQuery . id ( false ) ) ; // for bottomline
2011-06-13 23:44:03 +02:00
2007-08-28 14:15:46 +02:00
// process result of search
2009-12-02 01:37:59 +01:00
if ( ! filtered . isEmpty ( ) ) {
2007-10-24 23:38:19 +02:00
prop . put ( " excluded " , " 1 " ) ;
prop . putHTML ( " excluded_stopwords " , filtered . toString ( ) ) ;
2007-08-28 14:15:46 +02:00
} else {
2007-10-24 23:38:19 +02:00
prop . put ( " excluded " , " 0 " ) ;
2007-01-18 11:42:36 +01:00
}
2006-09-19 12:44:45 +02:00
2009-12-02 01:37:59 +01:00
if ( prop = = null | | prop . isEmpty ( ) ) {
2011-06-13 23:44:03 +02:00
if ( post . get ( " query " , post . get ( " search " , " " ) ) . length ( ) < 2 ) {
prop . put ( " num-results " , " 2 " ) ; // no results - at least 2 chars
2006-09-19 12:44:45 +02:00
} else {
2007-10-24 23:38:19 +02:00
prop . put ( " num-results " , " 1 " ) ; // no results
2006-09-19 12:44:45 +02:00
}
} else {
2007-10-24 23:38:19 +02:00
prop . put ( " num-results " , " 3 " ) ;
2006-09-19 12:44:45 +02:00
}
2009-04-15 13:22:43 +02:00
prop . put ( " cat " , " href " ) ;
prop . put ( " depth " , " 0 " ) ;
2006-09-19 12:44:45 +02:00
// adding some additional properties needed for the rss feed
2009-07-11 19:03:22 +02:00
String hostName = header . get ( " Host " , " localhost " ) ;
2011-04-02 01:32:40 +02:00
if ( hostName . indexOf ( ':' ) = = - 1 ) {
hostName + = " : " + serverCore . getPortNr ( env . getConfig ( " port " , " 8090 " ) ) ;
}
2007-10-24 23:38:19 +02:00
prop . put ( " searchBaseURL " , " http:// " + hostName + " /yacysearch.html " ) ;
prop . put ( " rssYacyImageURL " , " http:// " + hostName + " /env/grafics/yacy.gif " ) ;
2006-09-19 12:44:45 +02:00
}
2011-06-13 23:44:03 +02:00
2007-10-24 23:38:19 +02:00
prop . put ( " searchagain " , global ? " 1 " : " 0 " ) ;
2009-04-28 19:12:31 +02:00
prop . putHTML ( " former " , originalquerystring ) ;
2009-04-15 13:22:43 +02:00
prop . put ( " count " , itemsPerPage ) ;
prop . put ( " offset " , offset ) ;
prop . put ( " resource " , global ? " global " : " local " ) ;
prop . putHTML ( " urlmaskfilter " , originalUrlMask ) ;
prop . putHTML ( " prefermaskfilter " , prefermask ) ;
prop . put ( " indexof " , ( indexof ) ? " on " : " off " ) ;
prop . put ( " constraint " , ( constraint = = null ) ? " " : constraint . exportB64 ( ) ) ;
2011-03-21 08:50:34 +01:00
prop . put ( " search.verify " , snippetFetchStrategy = = null ? sb . getConfig ( " search.verify " , " iffresh " ) : snippetFetchStrategy . toName ( ) ) ;
prop . put ( " search.navigation " , ( post = = null ) ? sb . getConfig ( " search.navigation " , " all " ) : post . get ( " nav " , " all " ) ) ;
2009-04-15 13:22:43 +02:00
prop . put ( " contentdom " , ( post = = null ? " text " : post . get ( " contentdom " , " text " ) ) ) ;
2010-04-30 23:53:20 +02:00
prop . put ( " searchdomswitches " , sb . getConfigBool ( " search.text " , true ) | | sb . getConfigBool ( " search.audio " , true ) | | sb . getConfigBool ( " search.video " , true ) | | sb . getConfigBool ( " search.image " , true ) | | sb . getConfigBool ( " search.app " , true ) ? 1 : 0 ) ;
prop . put ( " searchdomswitches_searchtext " , sb . getConfigBool ( " search.text " , true ) ? 1 : 0 ) ;
prop . put ( " searchdomswitches_searchaudio " , sb . getConfigBool ( " search.audio " , true ) ? 1 : 0 ) ;
prop . put ( " searchdomswitches_searchvideo " , sb . getConfigBool ( " search.video " , true ) ? 1 : 0 ) ;
prop . put ( " searchdomswitches_searchimage " , sb . getConfigBool ( " search.image " , true ) ? 1 : 0 ) ;
prop . put ( " searchdomswitches_searchapp " , sb . getConfigBool ( " search.app " , true ) ? 1 : 0 ) ;
2010-05-12 01:06:39 +02:00
prop . put ( " searchdomswitches_searchtext_check " , ( contentdom = = ContentDomain . TEXT ) ? " 1 " : " 0 " ) ;
prop . put ( " searchdomswitches_searchaudio_check " , ( contentdom = = ContentDomain . AUDIO ) ? " 1 " : " 0 " ) ;
prop . put ( " searchdomswitches_searchvideo_check " , ( contentdom = = ContentDomain . VIDEO ) ? " 1 " : " 0 " ) ;
prop . put ( " searchdomswitches_searchimage_check " , ( contentdom = = ContentDomain . IMAGE ) ? " 1 " : " 0 " ) ;
prop . put ( " searchdomswitches_searchapp_check " , ( contentdom = = ContentDomain . APP ) ? " 1 " : " 0 " ) ;
2010-05-12 02:48:24 +02:00
// copy properties for "more options" link
prop . put ( " searchdomswitches_count " , prop . get ( " count " ) ) ;
prop . put ( " searchdomswitches_urlmaskfilter " , prop . get ( " urlmaskfilter " ) ) ;
prop . put ( " searchdomswitches_prefermaskfilter " , prop . get ( " prefermaskfilter " ) ) ;
prop . put ( " searchdomswitches_cat " , prop . get ( " cat " ) ) ;
prop . put ( " searchdomswitches_constraint " , prop . get ( " constraint " ) ) ;
prop . put ( " searchdomswitches_contentdom " , prop . get ( " contentdom " ) ) ;
prop . put ( " searchdomswitches_former " , prop . get ( " former " ) ) ;
prop . put ( " searchdomswitches_meanCount " , prop . get ( " meanCount " ) ) ;
2007-10-04 11:21:03 +02:00
// for RSS: don't HTML encode some elements
2009-04-28 19:12:31 +02:00
prop . putXML ( " rss_query " , originalquerystring ) ;
2010-09-08 13:54:25 +02:00
prop . putXML ( " rss_queryenc " , originalquerystring . replace ( ' ' , '+' ) ) ;
2011-06-13 23:44:03 +02:00
2007-12-06 22:53:17 +01:00
sb . localSearchLastAccess = System . currentTimeMillis ( ) ;
2011-06-13 23:44:03 +02:00
2006-09-19 12:44:45 +02:00
// return rewrite properties
return prop ;
}
}