2006-09-19 12:44:45 +02:00
// yacysearch.java
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
//
// You must compile this file with
// javac -classpath .:../classes yacysearch.java
// if the shell's current path is HTROOT
import java.io.IOException ;
2007-01-18 11:42:36 +01:00
import java.io.UnsupportedEncodingException ;
2006-09-19 12:44:45 +02:00
import java.net.MalformedURLException ;
2007-01-18 11:42:36 +01:00
import java.net.URLEncoder ;
2006-09-19 12:44:45 +02:00
import java.util.HashMap ;
import java.util.Iterator ;
2007-03-29 00:18:54 +02:00
import java.util.regex.PatternSyntaxException ;
2006-09-19 12:44:45 +02:00
import java.util.TreeSet ;
import de.anomic.htmlFilter.htmlFilterImageEntry ;
import de.anomic.http.httpHeader ;
2006-11-08 17:17:47 +01:00
import de.anomic.index.indexURLEntry ;
2006-11-23 03:16:30 +01:00
import de.anomic.kelondro.kelondroBitfield ;
2006-09-19 12:44:45 +02:00
import de.anomic.kelondro.kelondroMSetTools ;
import de.anomic.kelondro.kelondroNaturalOrder ;
2006-09-20 14:25:07 +02:00
import de.anomic.net.URL ;
2006-11-23 03:16:30 +01:00
import de.anomic.plasma.plasmaCondenser ;
2006-09-19 12:44:45 +02:00
import de.anomic.plasma.plasmaParserDocument ;
import de.anomic.plasma.plasmaSearchImages ;
import de.anomic.plasma.plasmaSearchPreOrder ;
import de.anomic.plasma.plasmaSearchQuery ;
import de.anomic.plasma.plasmaSearchRankingProfile ;
import de.anomic.plasma.plasmaSearchTimingProfile ;
import de.anomic.plasma.plasmaSwitchboard ;
2007-01-18 11:42:36 +01:00
import de.anomic.plasma.plasmaURL ;
2007-01-19 01:38:03 +01:00
import de.anomic.plasma.plasmaSearchResults ;
2006-09-19 12:44:45 +02:00
import de.anomic.server.serverCore ;
import de.anomic.server.serverDate ;
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
2006-12-21 04:09:46 +01:00
import de.anomic.tools.crypt ;
2007-01-18 11:42:36 +01:00
import de.anomic.tools.nxTools ;
2006-09-19 12:44:45 +02:00
import de.anomic.yacy.yacyCore ;
2007-01-18 11:42:36 +01:00
import de.anomic.yacy.yacyNewsPool ;
2006-09-19 12:44:45 +02:00
import de.anomic.yacy.yacyNewsRecord ;
2007-01-18 11:42:36 +01:00
import de.anomic.yacy.yacySeed ;
2006-09-19 12:44:45 +02:00
public class yacysearch {
public static final int MAX_TOPWORDS = 24 ;
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
final plasmaSwitchboard sb = ( plasmaSwitchboard ) env ;
boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
2007-04-11 16:46:54 +02:00
int display = ( post = = null ) ? 0 : post . getInt ( " display " , 0 ) ;
if ( ( display = = 1 ) & & ( ! authenticated ) ) display = 0 ;
2007-04-12 09:31:26 +02:00
int input = ( post = = null ) ? 2 : post . getInt ( " input " , 2 ) ;
2006-09-19 12:44:45 +02:00
String promoteSearchPageGreeting = env . getConfig ( " promoteSearchPageGreeting " , " " ) ;
if ( promoteSearchPageGreeting . length ( ) = = 0 ) promoteSearchPageGreeting = " P2P WEB SEARCH " ;
// case if no values are requested
final String referer = ( String ) header . get ( " Referer " ) ;
2006-12-23 02:54:28 +01:00
String querystring = ( post = = null ) ? " " : post . get ( " search " , " " ) . trim ( ) ;
2007-02-02 22:20:53 +01:00
2006-12-23 02:54:28 +01:00
if ( ( post = = null ) | | ( env = = null ) | | ( querystring . length ( ) = = 0 ) ) {
2006-09-19 12:44:45 +02:00
// save referrer
// System.out.println("HEADER=" + header.toString());
if ( referer ! = null ) {
URL url ;
try { url = new URL ( referer ) ; } catch ( MalformedURLException e ) { url = null ; }
if ( ( url ! = null ) & & ( serverCore . isNotLocal ( url ) ) ) {
final HashMap referrerprop = new HashMap ( ) ;
referrerprop . put ( " count " , " 1 " ) ;
referrerprop . put ( " clientip " , header . get ( " CLIENTIP " ) ) ;
referrerprop . put ( " useragent " , header . get ( " User-Agent " ) ) ;
referrerprop . put ( " date " , ( new serverDate ( ) ) . toShortString ( false ) ) ;
if ( sb . facilityDB ! = null ) try { sb . facilityDB . update ( " backlinks " , referer , referrerprop ) ; } catch ( IOException e ) { }
}
}
// we create empty entries for template strings
final serverObjects prop = new serverObjects ( ) ;
2007-04-12 09:31:26 +02:00
prop . put ( " searchagain " , 0 ) ;
2007-04-11 16:46:54 +02:00
prop . put ( " input " , input ) ;
2006-09-19 12:44:45 +02:00
prop . put ( " display " , display ) ;
2007-04-11 16:46:54 +02:00
prop . put ( " input_input " , input ) ;
prop . put ( " input_display " , display ) ;
2007-04-10 16:28:04 +02:00
prop . putASIS ( " input_promoteSearchPageGreeting " , promoteSearchPageGreeting ) ;
prop . put ( " input_former " , " " ) ;
2007-04-26 16:28:57 +02:00
prop . put ( " former " , " " ) ;
2007-04-10 16:28:04 +02:00
prop . put ( " input_count " , 10 ) ;
prop . put ( " input_resource " , " global " ) ;
prop . put ( " input_time " , 6 ) ;
prop . put ( " input_urlmaskfilter " , " .* " ) ;
prop . put ( " input_prefermaskfilter " , " " ) ;
prop . put ( " input_indexof " , " off " ) ;
prop . put ( " input_constraint " , plasmaSearchQuery . catchall_constraint . exportB64 ( ) ) ;
prop . put ( " input_cat " , " href " ) ;
prop . put ( " input_depth " , " 0 " ) ;
prop . put ( " input_contentdom " , " text " ) ;
prop . put ( " input_contentdomCheckText " , 1 ) ;
prop . put ( " input_contentdomCheckAudio " , 0 ) ;
prop . put ( " input_contentdomCheckVideo " , 0 ) ;
prop . put ( " input_contentdomCheckImage " , 0 ) ;
prop . put ( " input_contentdomCheckApp " , 0 ) ;
2007-04-12 10:58:19 +02:00
prop . put ( " type " , 0 ) ;
prop . put ( " type_excluded " , 0 ) ;
prop . put ( " type_num-results " , 0 ) ;
prop . put ( " type_combine " , 0 ) ;
prop . put ( " type_resultbottomline " , 0 ) ;
prop . put ( " type_results " , " " ) ;
2006-09-19 12:44:45 +02:00
return prop ;
}
2006-12-01 03:45:49 +01:00
// collect search attributes
2006-09-19 12:44:45 +02:00
int maxDistance = Integer . MAX_VALUE ;
2007-02-02 22:20:53 +01:00
2006-09-19 12:44:45 +02:00
if ( ( querystring . length ( ) > 2 ) & & ( querystring . charAt ( 0 ) = = '"' ) & & ( querystring . charAt ( querystring . length ( ) - 1 ) = = '"' ) ) {
querystring = querystring . substring ( 1 , querystring . length ( ) - 1 ) . trim ( ) ;
maxDistance = 1 ;
}
if ( sb . facilityDB ! = null ) try { sb . facilityDB . update ( " zeitgeist " , querystring , post ) ; } catch ( Exception e ) { }
2006-12-12 03:09:25 +01:00
int count = Integer . parseInt ( post . get ( " count " , " 10 " ) ) ;
2006-09-19 12:44:45 +02:00
boolean global = ( post = = null ) ? true : post . get ( " resource " , " global " ) . equals ( " global " ) ;
2006-11-23 03:16:30 +01:00
final boolean indexof = post . get ( " indexof " , " " ) . equals ( " on " ) ;
2006-09-19 12:44:45 +02:00
final long searchtime = 1000 * Long . parseLong ( post . get ( " time " , " 10 " ) ) ;
String urlmask = " " ;
if ( post . containsKey ( " urlmask " ) & & post . get ( " urlmask " ) . equals ( " no " ) ) {
urlmask = " .* " ;
} else {
urlmask = ( post . containsKey ( " urlmaskfilter " ) ) ? ( String ) post . get ( " urlmaskfilter " ) : " .* " ;
}
String prefermask = post . get ( " prefermaskfilter " , " " ) ;
if ( ( prefermask . length ( ) > 0 ) & & ( prefermask . indexOf ( " .* " ) < 0 ) ) prefermask = " .* " + prefermask + " .* " ;
2006-11-23 03:16:30 +01:00
kelondroBitfield constraint = post . containsKey ( " constraint " ) ? new kelondroBitfield ( 4 , post . get ( " constraint " , " ______ " ) ) : plasmaSearchQuery . catchall_constraint ;
if ( indexof ) {
constraint = new kelondroBitfield ( ) ;
constraint . set ( plasmaCondenser . flag_cat_indexof , true ) ;
}
2006-12-01 03:45:49 +01:00
// SEARCH
2007-04-23 22:47:07 +02:00
final boolean indexDistributeGranted = sb . getConfig ( plasmaSwitchboard . INDEX_DIST_ALLOW , " true " ) . equals ( " true " ) ;
2006-12-01 03:45:49 +01:00
final boolean indexReceiveGranted = sb . getConfig ( " allowReceiveIndex " , " true " ) . equals ( " true " ) ;
2007-04-03 14:10:12 +02:00
final boolean offline = yacyCore . seedDB . mySeed . isVirgin ( ) ;
2007-04-26 11:51:51 +02:00
final boolean clustersearch = sb . isRobinsonMode ( ) & &
2007-04-28 02:12:41 +02:00
( sb . getConfig ( " cluster.mode " , " " ) . equals ( " privatecluster " ) | |
sb . getConfig ( " cluster.mode " , " " ) . equals ( " publiccluster " ) ) ;
2007-04-03 14:10:12 +02:00
if ( offline | | ! indexDistributeGranted | | ! indexReceiveGranted ) { global = false ; }
2007-04-26 11:51:51 +02:00
if ( clustersearch ) global = true ; // switches search on, but search target is limited to cluster nodes
2006-12-01 03:45:49 +01:00
2006-12-07 03:40:57 +01:00
// find search domain
2006-12-20 16:44:29 +01:00
int contentdomCode = plasmaSearchQuery . CONTENTDOM_TEXT ;
String contentdomString = post . get ( " contentdom " , " text " ) ;
if ( contentdomString . equals ( " text " ) ) contentdomCode = plasmaSearchQuery . CONTENTDOM_TEXT ;
if ( contentdomString . equals ( " audio " ) ) contentdomCode = plasmaSearchQuery . CONTENTDOM_AUDIO ;
if ( contentdomString . equals ( " video " ) ) contentdomCode = plasmaSearchQuery . CONTENTDOM_VIDEO ;
if ( contentdomString . equals ( " image " ) ) contentdomCode = plasmaSearchQuery . CONTENTDOM_IMAGE ;
if ( contentdomString . equals ( " app " ) ) contentdomCode = plasmaSearchQuery . CONTENTDOM_APP ;
2006-12-07 03:40:57 +01:00
2006-12-12 03:09:25 +01:00
// patch until better search profiles are available
2006-12-20 16:44:29 +01:00
if ( ( contentdomCode ! = plasmaSearchQuery . CONTENTDOM_TEXT ) & & ( count < = 10 ) ) count = 30 ;
2006-12-12 03:09:25 +01:00
2006-09-19 12:44:45 +02:00
serverObjects prop = new serverObjects ( ) ;
if ( post . get ( " cat " , " href " ) . equals ( " href " ) ) {
2007-04-03 17:35:29 +02:00
final TreeSet [ ] query = plasmaSearchQuery . cleanQuery ( querystring ) ;
2006-12-20 16:44:29 +01:00
// filter out stopwords
2007-04-03 17:35:29 +02:00
final TreeSet filtered = kelondroMSetTools . joinConstructive ( query [ 0 ] , plasmaSwitchboard . stopwords ) ;
2006-12-20 16:44:29 +01:00
if ( filtered . size ( ) > 0 ) {
2007-04-03 17:35:29 +02:00
kelondroMSetTools . excludeDestructive ( query [ 0 ] , plasmaSwitchboard . stopwords ) ;
2006-12-20 16:44:29 +01:00
}
2006-09-19 12:44:45 +02:00
2006-12-20 16:44:29 +01:00
// if a minus-button was hit, remove a special reference first
if ( post . containsKey ( " deleteref " ) ) {
if ( ! sb . verifyAuthentication ( header , true ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
2006-09-19 12:44:45 +02:00
}
2006-12-20 16:44:29 +01:00
// delete the index entry locally
final String delHash = post . get ( " deleteref " , " " ) ; // urlhash
2007-04-03 17:35:29 +02:00
sb . wordIndex . removeWordReferences ( query [ 0 ] , delHash ) ;
2006-12-20 16:44:29 +01:00
// make new news message with negative voting
HashMap map = new HashMap ( ) ;
map . put ( " urlhash " , delHash ) ;
map . put ( " vote " , " negative " ) ;
map . put ( " refid " , " " ) ;
2007-03-21 12:09:15 +01:00
yacyCore . newsPool . publishMyNews ( new yacyNewsRecord ( yacyNewsPool . CATEGORY_SURFTIPP_VOTE_ADD , map ) ) ;
2006-12-20 16:44:29 +01:00
}
2006-09-19 12:44:45 +02:00
2006-12-20 16:44:29 +01:00
// if aplus-button was hit, create new voting message
if ( post . containsKey ( " recommendref " ) ) {
if ( ! sb . verifyAuthentication ( header , true ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
final String recommendHash = post . get ( " recommendref " , " " ) ; // urlhash
indexURLEntry urlentry = sb . wordIndex . loadedURL . load ( recommendHash , null ) ;
if ( urlentry ! = null ) {
indexURLEntry . Components comp = urlentry . comp ( ) ;
plasmaParserDocument document ;
document = sb . snippetCache . retrieveDocument ( comp . url ( ) , true , 5000 , true ) ;
if ( document ! = null ) {
// create a news message
HashMap map = new HashMap ( ) ;
map . put ( " url " , comp . url ( ) . toNormalform ( ) . replace ( ',' , '|' ) ) ;
2007-03-18 13:33:19 +01:00
map . put ( " title " , comp . title ( ) . replace ( ',' , ' ' ) ) ;
map . put ( " description " , ( ( document = = null ) ? comp . title ( ) : document . getTitle ( ) ) . replace ( ',' , ' ' ) ) ;
2007-03-17 02:18:34 +01:00
map . put ( " author " , ( ( document = = null ) ? " " : document . getAuthor ( ) ) ) ;
map . put ( " tags " , ( ( document = = null ) ? " " : document . getKeywords ( ' ' ) ) ) ;
2007-03-21 12:09:15 +01:00
yacyCore . newsPool . publishMyNews ( new yacyNewsRecord ( yacyNewsPool . CATEGORY_SURFTIPP_ADD , map ) ) ;
2006-12-20 16:44:29 +01:00
document . close ( ) ;
2006-09-19 12:44:45 +02:00
}
}
2006-12-20 16:44:29 +01:00
}
2006-09-19 12:44:45 +02:00
2006-12-20 16:44:29 +01:00
// prepare search properties
final boolean yacyonline = ( ( yacyCore . seedDB ! = null ) & & ( yacyCore . seedDB . mySeed ! = null ) & & ( yacyCore . seedDB . mySeed . getAddress ( ) ! = null ) ) ;
final boolean samesearch = env . getConfig ( " last-search " , " " ) . equals ( querystring + contentdomString ) ;
final boolean globalsearch = ( global ) & & ( yacyonline ) & & ( ! samesearch ) ;
// do the search
plasmaSearchQuery thisSearch = new plasmaSearchQuery (
2007-04-05 12:14:48 +02:00
querystring ,
plasmaCondenser . words2hashes ( query [ 0 ] ) ,
plasmaCondenser . words2hashes ( query [ 1 ] ) ,
2006-09-19 12:44:45 +02:00
maxDistance ,
prefermask ,
2006-12-20 16:44:29 +01:00
contentdomCode ,
2006-09-19 12:44:45 +02:00
count ,
searchtime ,
urlmask ,
2007-04-26 11:51:51 +02:00
( clustersearch & & globalsearch ) ? plasmaSearchQuery . SEARCHDOM_CLUSTERALL :
( ( globalsearch ) ? plasmaSearchQuery . SEARCHDOM_GLOBALDHT : plasmaSearchQuery . SEARCHDOM_LOCAL ) ,
2006-12-20 16:44:29 +01:00
" " ,
20 ,
constraint ) ;
2006-12-21 04:09:46 +01:00
plasmaSearchRankingProfile ranking = ( sb . getConfig ( " rankingProfile " , " " ) . length ( ) = = 0 ) ? new plasmaSearchRankingProfile ( contentdomString ) : new plasmaSearchRankingProfile ( " " , crypt . simpleDecode ( sb . getConfig ( " rankingProfile " , " " ) , null ) ) ;
2006-12-20 16:44:29 +01:00
plasmaSearchTimingProfile localTiming = new plasmaSearchTimingProfile ( 4 * thisSearch . maximumTime / 10 , thisSearch . wantedResults ) ;
plasmaSearchTimingProfile remoteTiming = new plasmaSearchTimingProfile ( 6 * thisSearch . maximumTime / 10 , thisSearch . wantedResults ) ;
2007-03-29 00:18:54 +02:00
plasmaSearchResults results = new plasmaSearchResults ( ) ;
String wrongregex = null ;
try {
results = sb . searchFromLocal ( thisSearch , ranking , localTiming , remoteTiming , true , ( String ) header . get ( " CLIENTIP " ) ) ;
}
catch ( PatternSyntaxException e ) {
wrongregex = e . getPattern ( ) ;
}
2007-01-18 11:42:36 +01:00
//prop=sb.searchFromLocal(thisSearch, ranking, localTiming, remoteTiming, true, (String) header.get("CLIENTIP"));
prop = new serverObjects ( ) ;
//prop.put("references", 0);
URL wordURL = null ;
prop . put ( " num-results_totalcount " , results . getTotalcount ( ) ) ;
prop . put ( " num-results_filteredcount " , results . getFilteredcount ( ) ) ;
prop . put ( " num-results_orderedcount " , results . getOrderedcount ( ) ) ;
prop . put ( " num-results_linkcount " , results . getLinkcount ( ) ) ;
prop . put ( " type_results " , 0 ) ;
if ( results . numResults ( ) ! = 0 ) {
//we've got results
prop . put ( " num-results_totalcount " , results . getTotalcount ( ) ) ;
prop . put ( " num-results_filteredcount " , results . getFilteredcount ( ) ) ;
prop . put ( " num-results_orderedcount " , Integer . toString ( results . getOrderedcount ( ) ) ) ; //why toString?
prop . put ( " num-results_globalresults " , results . getGlobalresults ( ) ) ;
for ( int i = 0 ; i < results . numResults ( ) ; i + + ) {
2007-01-19 01:38:03 +01:00
plasmaSearchResults . searchResult result = results . getResult ( i ) ;
2007-01-18 11:42:36 +01:00
try {
2007-03-21 12:09:15 +01:00
prop . put ( " type_results_ " + i + " _authorized_recommend " , ( yacyCore . newsPool . getSpecific ( yacyNewsPool . OUTGOING_DB , yacyNewsPool . CATEGORY_SURFTIPP_ADD , " url " , result . getUrl ( ) ) = = null ) ? 1 : 0 ) ;
2007-01-18 11:42:36 +01:00
} catch ( IOException e ) { }
2007-01-19 10:35:08 +01:00
//prop.put("type_results_" + i + "_authorized_recommend_deletelink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&deleteref=" + result.getUrlhash() + "&urlmaskfilter=.*");
//prop.put("type_results_" + i + "_authorized_recommend_recommendlink", "/yacysearch.html?search=" + results.getFormerSearch() + "&Enter=Search&count=" + results.getQuery().wantedResults + "&order=" + crypt.simpleEncode(results.getRanking().toExternalString()) + "&resource=local&time=3&recommendref=" + result.getUrlhash() + "&urlmaskfilter=.*");
prop . put ( " type_results_ " + i + " _authorized_recommend_deletelink " , " /yacysearch.html?search= " + results . getFormerSearch ( ) + " &Enter=Search&count= " + results . getQuery ( ) . wantedResults + " &order= " + crypt . simpleEncode ( results . getRanking ( ) . toExternalString ( ) ) + " &resource=local&time=3&deleteref= " + result . getUrlhash ( ) + " &urlmaskfilter=.* " ) ;
prop . put ( " type_results_ " + i + " _authorized_recommend_recommendlink " , " /yacysearch.html?search= " + results . getFormerSearch ( ) + " &Enter=Search&count= " + results . getQuery ( ) . wantedResults + " &order= " + crypt . simpleEncode ( results . getRanking ( ) . toExternalString ( ) ) + " &resource=local&time=3&recommendref= " + result . getUrlhash ( ) + " &urlmaskfilter=.* " ) ;
2007-01-18 11:42:36 +01:00
prop . put ( " type_results_ " + i + " _authorized_urlhash " , result . getUrlhash ( ) ) ;
2007-03-18 13:33:19 +01:00
prop . put ( " type_results_ " + i + " _description " , result . getUrlentry ( ) . comp ( ) . title ( ) ) ;
2007-01-18 11:42:36 +01:00
prop . put ( " type_results_ " + i + " _url " , result . getUrl ( ) ) ;
prop . put ( " type_results_ " + i + " _urlhash " , result . getUrlhash ( ) ) ;
prop . put ( " type_results_ " + i + " _urlhexhash " , yacySeed . b64Hash2hexHash ( result . getUrlhash ( ) ) ) ;
prop . put ( " type_results_ " + i + " _urlname " , nxTools . shortenURLString ( result . getUrlname ( ) , 120 ) ) ;
prop . put ( " type_results_ " + i + " _date " , plasmaSwitchboard . dateString ( result . getUrlentry ( ) . moddate ( ) ) ) ;
prop . put ( " type_results_ " + i + " _ybr " , plasmaSearchPreOrder . ybr ( result . getUrlentry ( ) . hash ( ) ) ) ;
prop . put ( " type_results_ " + i + " _size " , Long . toString ( result . getUrlentry ( ) . size ( ) ) ) ;
try {
2007-04-11 16:46:54 +02:00
prop . put ( " type_results_ " + i + " _words " , URLEncoder . encode ( query [ 0 ] . toString ( ) , " UTF-8 " ) ) ;
2007-01-18 11:42:36 +01:00
} catch ( UnsupportedEncodingException e ) { }
prop . put ( " type_results_ " + i + " _former " , results . getFormerSearch ( ) ) ;
prop . put ( " type_results_ " + i + " _rankingprops " , result . getUrlentry ( ) . word ( ) . toPropertyForm ( ) + " , domLengthEstimated= " + plasmaURL . domLengthEstimation ( result . getUrlhash ( ) ) +
( ( plasmaURL . probablyRootURL ( result . getUrlhash ( ) ) ) ? " , probablyRootURL " : " " ) +
2007-04-05 12:14:48 +02:00
( ( ( wordURL = plasmaURL . probablyWordURL ( result . getUrlhash ( ) , query [ 0 ] ) ) ! = null ) ? " , probablyWordURL= " + wordURL . toNormalform ( ) : " " ) ) ;
2007-01-18 11:42:36 +01:00
// adding snippet if available
if ( result . hasSnippet ( ) ) {
prop . put ( " type_results_ " + i + " _snippet " , 1 ) ;
prop . putASIS ( " type_results_ " + i + " _snippet_text " , result . getSnippet ( ) . getLineMarked ( results . getQuery ( ) . queryHashes ) ) ; //FIXME: the ASIS should not be needed, if there is no html in .java
} else {
prop . put ( " type_results_ " + i + " _snippet " , 0 ) ;
prop . put ( " type_results_ " + i + " _snippet_text " , " " ) ;
}
prop . put ( " type_results " , results . numResults ( ) ) ;
prop . put ( " references " , results . getReferences ( ) ) ;
prop . put ( " num-results_linkcount " , Integer . toString ( results . numResults ( ) ) ) ;
}
}
2006-12-20 16:44:29 +01:00
// remember the last search expression
env . setConfig ( " last-search " , querystring + contentdomString ) ;
// process result of search
prop . put ( " type_resultbottomline " , 0 ) ;
if ( filtered . size ( ) > 0 ) {
prop . put ( " excluded " , 1 ) ;
prop . put ( " excluded_stopwords " , filtered . toString ( ) ) ;
} else {
prop . put ( " excluded " , 0 ) ;
}
2006-09-19 12:44:45 +02:00
if ( prop = = null | | prop . size ( ) = = 0 ) {
if ( post . get ( " search " , " " ) . length ( ) < 3 ) {
2006-12-13 02:39:34 +01:00
prop . put ( " num-results " , 2 ) ; // no results - at least 3 chars
2006-09-19 12:44:45 +02:00
} else {
2006-12-13 02:39:34 +01:00
prop . put ( " num-results " , 1 ) ; // no results
2006-09-19 12:44:45 +02:00
}
} else {
2006-12-14 03:48:37 +01:00
final int totalcount = prop . getInt ( " num-results_totalcount " , 0 ) ;
2006-12-13 02:39:34 +01:00
if ( totalcount > = 10 ) {
final Object [ ] references = ( Object [ ] ) prop . get ( " references " , new String [ 0 ] ) ;
2007-03-29 00:37:01 +02:00
prop . put ( " num-results " , 5 ) ;
2006-09-19 12:44:45 +02:00
int hintcount = references . length ;
if ( hintcount > 0 ) {
prop . put ( " type_combine " , 1 ) ;
// get the topwords
final TreeSet topwords = new TreeSet ( kelondroNaturalOrder . naturalOrder ) ;
String tmp = " " ;
for ( int i = 0 ; i < hintcount ; i + + ) {
tmp = ( String ) references [ i ] ;
if ( tmp . matches ( " [a-z]+ " ) ) {
topwords . add ( tmp ) ;
// } else {
// topwords.add("(" + tmp + ")");
}
}
// filter out the badwords
final TreeSet filteredtopwords = kelondroMSetTools . joinConstructive ( topwords , plasmaSwitchboard . badwords ) ;
if ( filteredtopwords . size ( ) > 0 ) {
kelondroMSetTools . excludeDestructive ( topwords , plasmaSwitchboard . badwords ) ;
}
2007-03-08 21:50:27 +01:00
//avoid stopwords being topwords
if ( env . getConfig ( " filterOutStopwordsFromTopwords " , " true " ) . equals ( " true " ) ) {
if ( ( plasmaSwitchboard . stopwords ! = null ) & & ( plasmaSwitchboard . stopwords . size ( ) > 0 ) ) {
kelondroMSetTools . excludeDestructive ( topwords , plasmaSwitchboard . stopwords ) ;
}
}
2006-09-19 12:44:45 +02:00
String word ;
hintcount = 0 ;
final Iterator iter = topwords . iterator ( ) ;
while ( iter . hasNext ( ) ) {
word = ( String ) iter . next ( ) ;
if ( word ! = null ) {
prop . put ( " type_combine_words_ " + hintcount + " _word " , word ) ;
prop . put ( " type_combine_words_ " + hintcount + " _newsearch " , post . get ( " search " , " " ) . replace ( ' ' , '+' ) + " + " + word ) ;
prop . put ( " type_combine_words_ " + hintcount + " _count " , count ) ;
prop . put ( " type_combine_words_ " + hintcount + " _resource " , ( ( global ) ? " global " : " local " ) ) ;
prop . put ( " type_combine_words_ " + hintcount + " _time " , ( searchtime / 1000 ) ) ;
}
prop . put ( " type_combine_words " , hintcount ) ;
if ( hintcount + + > MAX_TOPWORDS ) {
break ;
}
}
}
} else {
2007-03-29 00:18:54 +02:00
if ( wrongregex ! = null ) {
prop . put ( " num-results_wrong_regex " , wrongregex ) ;
2006-12-13 02:39:34 +01:00
prop . put ( " num-results " , 4 ) ;
2006-09-19 12:44:45 +02:00
}
2007-03-29 00:18:54 +02:00
else if ( totalcount = = 0 ) {
prop . put ( " num-results " , 3 ) ; // long
}
else {
prop . put ( " num-results " , 5 ) ;
}
2006-09-19 12:44:45 +02:00
}
}
2007-03-29 00:18:54 +02:00
if ( wrongregex ! = null ) {
prop . put ( " type_resultbottomline " , 0 ) ;
}
else if ( yacyonline ) {
2006-09-19 12:44:45 +02:00
if ( global ) {
prop . put ( " type_resultbottomline " , 1 ) ;
2007-01-14 20:09:54 +01:00
prop . put ( " type_resultbottomline_globalresults " , prop . get ( " num-results_globalresults " , " 0 " ) ) ;
2006-09-19 12:44:45 +02:00
} else {
2007-04-10 16:37:34 +02:00
prop . put ( " type_resultbottomline " , 0 ) ;
2006-09-19 12:44:45 +02:00
}
} else {
if ( global ) {
prop . put ( " type_resultbottomline " , 3 ) ;
} else {
2007-04-10 16:37:34 +02:00
prop . put ( " type_resultbottomline " , 0 ) ;
2006-09-19 12:44:45 +02:00
}
}
2007-04-12 10:58:19 +02:00
prop . put ( " type " , ( thisSearch . contentdom = = plasmaSearchQuery . CONTENTDOM_TEXT ) ? 0 : ( ( thisSearch . contentdom = = plasmaSearchQuery . CONTENTDOM_IMAGE ) ? 2 : 1 ) ) ;
2006-12-20 16:44:29 +01:00
if ( prop . getInt ( " type " , 0 ) = = 1 ) prop . put ( " type_mediatype " , contentdomString ) ;
2007-04-11 16:46:54 +02:00
prop . put ( " input_cat " , " href " ) ;
prop . put ( " input_depth " , " 0 " ) ;
2006-09-19 12:44:45 +02:00
// adding some additional properties needed for the rss feed
String hostName = ( String ) header . get ( " Host " , " localhost " ) ;
if ( hostName . indexOf ( " : " ) = = - 1 ) hostName + = " : " + serverCore . getPortNr ( env . getConfig ( " port " , " 8080 " ) ) ;
prop . put ( " rssYacyImageURL " , " http:// " + hostName + " /env/grafics/yacy.gif " ) ;
}
if ( post . get ( " cat " , " href " ) . equals ( " image " ) ) {
int depth = post . getInt ( " depth " , 0 ) ;
int columns = post . getInt ( " columns " , 6 ) ;
URL url = null ;
try { url = new URL ( post . get ( " url " , " " ) ) ; } catch ( MalformedURLException e ) { }
plasmaSearchImages si = new plasmaSearchImages ( sb . snippetCache , 6000 , url , depth ) ;
Iterator i = si . entries ( ) ;
htmlFilterImageEntry ie ;
int line = 0 ;
while ( i . hasNext ( ) ) {
int col = 0 ;
for ( col = 0 ; col < columns ; col + + ) {
if ( ! i . hasNext ( ) ) break ;
ie = ( htmlFilterImageEntry ) i . next ( ) ;
String urls = ie . url ( ) . toString ( ) ;
String name = " " ;
int p = urls . lastIndexOf ( '/' ) ;
if ( p > 0 ) name = urls . substring ( p + 1 ) ;
prop . put ( " type_results_ " + line + " _line_ " + col + " _url " , urls ) ;
prop . put ( " type_results_ " + line + " _line_ " + col + " _name " , name ) ;
}
prop . put ( " type_results_ " + line + " _line " , col ) ;
line + + ;
}
prop . put ( " type_results " , line ) ;
2007-04-12 10:58:19 +02:00
prop . put ( " type " , 3 ) ; // set type of result: image list
2007-04-11 16:46:54 +02:00
prop . put ( " input_cat " , " href " ) ;
prop . put ( " input_depth " , depth ) ;
2006-09-19 12:44:45 +02:00
}
2007-01-06 12:05:50 +01:00
2007-01-03 02:03:56 +01:00
// if user is not authenticated, he may not vote for URLs
2007-01-06 12:05:50 +01:00
int linkcount = Integer . parseInt ( prop . get ( " num-results_linkcount " , " 0 " ) ) ;
for ( int i = 0 ; i < linkcount ; i + + )
prop . put ( " type_results_ " + i + " _authorized " , ( authenticated ) ? 1 : 0 ) ;
2006-09-19 12:44:45 +02:00
2007-04-12 09:31:26 +02:00
prop . put ( " searchagain " , ( global ) ? 1 : 0 ) ;
2007-04-11 16:46:54 +02:00
prop . put ( " input " , input ) ;
2006-09-19 12:44:45 +02:00
prop . put ( " display " , display ) ;
2007-04-11 16:46:54 +02:00
prop . put ( " input_input " , input ) ;
prop . put ( " input_display " , display ) ;
2007-04-10 16:28:04 +02:00
prop . putASIS ( " input_promoteSearchPageGreeting " , promoteSearchPageGreeting ) ;
prop . put ( " input_former " , post . get ( " search " , " " ) ) ;
2007-04-26 16:28:57 +02:00
prop . put ( " former " , post . get ( " search " , " " ) ) ;
2007-04-10 16:28:04 +02:00
prop . put ( " input_count " , count ) ;
prop . put ( " input_resource " , ( global ) ? " global " : " local " ) ;
prop . put ( " input_time " , searchtime / 1000 ) ;
prop . put ( " input_urlmaskfilter " , urlmask ) ;
prop . put ( " input_prefermaskfilter " , prefermask ) ;
prop . put ( " input_indexof " , ( indexof ) ? " on " : " off " ) ;
prop . put ( " input_constraint " , constraint . exportB64 ( ) ) ;
prop . put ( " input_contentdom " , contentdomString ) ;
prop . put ( " input_contentdomCheckText " , ( contentdomCode = = plasmaSearchQuery . CONTENTDOM_TEXT ) ? 1 : 0 ) ;
prop . put ( " input_contentdomCheckAudio " , ( contentdomCode = = plasmaSearchQuery . CONTENTDOM_AUDIO ) ? 1 : 0 ) ;
prop . put ( " input_contentdomCheckVideo " , ( contentdomCode = = plasmaSearchQuery . CONTENTDOM_VIDEO ) ? 1 : 0 ) ;
prop . put ( " input_contentdomCheckImage " , ( contentdomCode = = plasmaSearchQuery . CONTENTDOM_IMAGE ) ? 1 : 0 ) ;
prop . put ( " input_contentdomCheckApp " , ( contentdomCode = = plasmaSearchQuery . CONTENTDOM_APP ) ? 1 : 0 ) ;
2007-04-13 20:01:02 +02:00
prop . put ( " type_former " , post . get ( " search " , " " ) ) ; //the query-string used to get the snippets
2007-04-11 16:46:54 +02:00
2007-04-10 16:28:04 +02:00
2006-09-19 12:44:45 +02:00
// return rewrite properties
return prop ;
}
}