2007-08-28 14:15:46 +02:00
// yacysearchitem.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 28.08.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
2009-09-27 00:07:40 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2007-08-28 14:15:46 +02:00
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.UnsupportedEncodingException ;
import java.net.MalformedURLException ;
import java.net.URLEncoder ;
2007-09-06 15:26:38 +02:00
import java.util.ArrayList ;
2007-08-28 14:15:46 +02:00
import java.util.TreeSet ;
2009-10-11 02:12:19 +02:00
import net.yacy.kelondro.data.meta.DigestURI ;
2009-12-08 15:25:51 +01:00
import net.yacy.kelondro.util.EventTracker ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.Formatter ;
2009-10-11 23:29:18 +02:00
import de.anomic.http.server.HeaderFramework ;
import de.anomic.http.server.RequestHeader ;
2009-11-19 00:56:05 +01:00
import de.anomic.search.ContentDomain ;
2009-08-27 16:34:41 +02:00
import de.anomic.search.MediaSnippet ;
2009-07-09 00:14:57 +02:00
import de.anomic.search.QueryParams ;
2009-08-24 17:24:02 +02:00
import de.anomic.search.SearchEvent ;
2009-07-09 00:14:57 +02:00
import de.anomic.search.RankingProcess ;
2009-08-24 17:24:02 +02:00
import de.anomic.search.ResultEntry ;
import de.anomic.search.SearchEventCache ;
2009-07-19 22:37:44 +02:00
import de.anomic.search.Switchboard ;
2009-08-27 16:34:41 +02:00
import de.anomic.search.TextSnippet ;
2007-08-28 14:15:46 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
import de.anomic.tools.crypt ;
import de.anomic.tools.nxTools ;
import de.anomic.yacy.yacyNewsPool ;
import de.anomic.yacy.yacySeed ;
2009-10-20 00:34:44 +02:00
import de.anomic.yacy.graphics.ProfilingGraph ;
2007-08-28 14:15:46 +02:00
public class yacysearchitem {
2007-09-07 13:45:38 +02:00
private static boolean col = true ;
private static final int namelength = 60 ;
private static final int urllength = 120 ;
2009-07-19 22:37:44 +02:00
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
final Switchboard sb = ( Switchboard ) env ;
2007-08-28 14:15:46 +02:00
final serverObjects prop = new serverObjects ( ) ;
2008-08-02 14:12:04 +02:00
final String eventID = post . get ( " eventID " , " " ) ;
final boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
final int item = post . getInt ( " item " , - 1 ) ;
2009-07-19 22:37:44 +02:00
final boolean auth = ( header . get ( HeaderFramework . CONNECTION_PROP_CLIENTIP , " " ) ) . equals ( " localhost " ) | | sb . verifyAuthentication ( header , true ) ;
2008-10-18 21:39:46 +02:00
final int display = ( post = = null ) ? 0 : post . getInt ( " display " , 0 ) ;
2007-08-28 14:15:46 +02:00
2007-09-09 14:30:18 +02:00
// default settings for blank item
2007-10-24 23:38:19 +02:00
prop . put ( " content " , " 0 " ) ;
prop . put ( " rss " , " 0 " ) ;
prop . put ( " references " , " 0 " ) ;
prop . put ( " rssreferences " , " 0 " ) ;
prop . put ( " dynamic " , " 0 " ) ;
2007-09-09 14:30:18 +02:00
2007-08-28 14:15:46 +02:00
// find search event
2009-08-24 17:24:02 +02:00
final SearchEvent theSearch = SearchEventCache . getEvent ( eventID ) ;
2007-09-09 14:30:18 +02:00
if ( theSearch = = null ) {
// the event does not exist, show empty page
return prop ;
}
2009-07-09 00:14:57 +02:00
final QueryParams theQuery = theSearch . getQuery ( ) ;
2008-03-23 01:55:04 +01:00
2007-09-04 01:43:55 +02:00
// dynamically update count values
2010-01-13 01:04:37 +01:00
final int totalcount = theSearch . getRankingResult ( ) . getLocalIndexCount ( ) + theSearch . getRankingResult ( ) . getRemoteResourceSize ( ) ;
2008-12-02 16:24:25 +01:00
final int offset = theQuery . neededResults ( ) - theQuery . displayResults ( ) + 1 ;
prop . put ( " offset " , offset ) ;
2009-11-24 12:13:11 +01:00
prop . put ( " itemscount " , Formatter . number ( Math . min ( ( item < 0 ) ? theQuery . neededResults ( ) : item + 1 , totalcount ) ) ) ;
prop . put ( " totalcount " , Formatter . number ( totalcount , true ) ) ;
2010-01-13 01:04:37 +01:00
prop . put ( " localResourceSize " , Formatter . number ( theSearch . getRankingResult ( ) . getLocalIndexCount ( ) , true ) ) ;
2009-01-31 02:06:56 +01:00
prop . put ( " remoteResourceSize " , Formatter . number ( theSearch . getRankingResult ( ) . getRemoteResourceSize ( ) , true ) ) ;
prop . put ( " remoteIndexCount " , Formatter . number ( theSearch . getRankingResult ( ) . getRemoteIndexCount ( ) , true ) ) ;
prop . put ( " remotePeerCount " , Formatter . number ( theSearch . getRankingResult ( ) . getRemotePeerCount ( ) , true ) ) ;
2007-08-28 14:15:46 +02:00
2009-11-19 00:56:05 +01:00
if ( theQuery . contentdom = = ContentDomain . TEXT ) {
2007-09-06 15:26:38 +02:00
// text search
2008-02-21 15:53:51 +01:00
// generate result object
2009-08-24 17:24:02 +02:00
final ResultEntry result = theSearch . oneResult ( item ) ;
2008-02-21 15:53:51 +01:00
if ( result = = null ) return prop ; // no content
2008-12-02 16:24:25 +01:00
2008-02-21 15:53:51 +01:00
2008-08-02 14:12:04 +02:00
final int port = result . url ( ) . getPort ( ) ;
2009-10-11 02:12:19 +02:00
DigestURI faviconURL = null ;
2009-03-06 16:39:02 +01:00
if ( ! result . url ( ) . isLocal ( ) ) try {
2010-01-12 21:53:19 +01:00
faviconURL = new DigestURI ( result . url ( ) . getProtocol ( ) + " :// " + result . url ( ) . getHost ( ) + ( ( port ! = - 1 ) ? ( " : " + port ) : " " ) + " /favicon.ico " , null ) ;
2008-08-02 14:12:04 +02:00
} catch ( final MalformedURLException e1 ) {
2007-09-06 15:26:38 +02:00
faviconURL = null ;
}
2008-12-02 16:24:25 +01:00
prop . put ( " content " , 1 ) ; // switch on specific content
prop . put ( " content_authorized " , authenticated ? " 1 " : " 0 " ) ;
2009-05-28 16:26:05 +02:00
prop . put ( " content_authorized_recommend " , ( sb . peers . newsPool . getSpecific ( yacyNewsPool . OUTGOING_DB , yacyNewsPool . CATEGORY_SURFTIPP_ADD , " url " , result . urlstring ( ) ) = = null ) ? " 1 " : " 0 " ) ;
2008-12-02 16:24:25 +01:00
prop . putHTML ( " content_authorized_recommend_deletelink " , " /yacysearch.html?search= " + theQuery . queryString + " &Enter=Search&count= " + theQuery . displayResults ( ) + " &offset= " + ( theQuery . neededResults ( ) - theQuery . displayResults ( ) ) + " &order= " + crypt . simpleEncode ( theQuery . ranking . toExternalString ( ) ) + " &resource=local&time=3&deleteref= " + result . hash ( ) + " &urlmaskfilter=.* " ) ;
prop . putHTML ( " content_authorized_recommend_recommendlink " , " /yacysearch.html?search= " + theQuery . queryString + " &Enter=Search&count= " + theQuery . displayResults ( ) + " &offset= " + ( theQuery . neededResults ( ) - theQuery . displayResults ( ) ) + " &order= " + crypt . simpleEncode ( theQuery . ranking . toExternalString ( ) ) + " &resource=local&time=3&recommendref= " + result . hash ( ) + " &urlmaskfilter=.* " ) ;
prop . put ( " content_authorized_urlhash " , result . hash ( ) ) ;
prop . putHTML ( " content_title " , result . title ( ) ) ;
2009-01-17 00:22:42 +01:00
prop . putXML ( " content_title-xml " , result . title ( ) ) ;
2009-04-26 21:17:36 +02:00
prop . putJSON ( " content_title-json " , result . title ( ) ) ;
2008-12-02 16:24:25 +01:00
prop . putHTML ( " content_link " , result . urlstring ( ) ) ;
prop . put ( " content_display " , display ) ;
2007-10-24 23:38:19 +02:00
prop . putHTML ( " content_faviconCode " , sb . licensedURLs . aquireLicense ( faviconURL ) ) ; // aquire license for favicon url loading
2007-09-06 15:26:38 +02:00
prop . put ( " content_urlhash " , result . hash ( ) ) ;
prop . put ( " content_urlhexhash " , yacySeed . b64Hash2hexHash ( result . hash ( ) ) ) ;
2007-10-24 23:38:19 +02:00
prop . putHTML ( " content_urlname " , nxTools . shortenURLString ( result . urlname ( ) , urllength ) ) ;
2009-07-19 22:37:44 +02:00
prop . put ( " content_date " , Switchboard . dateString ( result . modified ( ) ) ) ;
prop . put ( " content_date822 " , Switchboard . dateString822 ( result . modified ( ) ) ) ;
2009-07-09 00:14:57 +02:00
prop . put ( " content_ybr " , RankingProcess . ybr ( result . hash ( ) ) ) ;
2009-02-23 12:39:20 +01:00
prop . putHTML ( " content_size " , Integer . toString ( result . filesize ( ) ) ) ; // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
2009-02-23 08:59:11 +01:00
prop . putHTML ( " content_sizename " , sizename ( result . filesize ( ) ) ) ;
2009-02-23 12:39:20 +01:00
prop . putHTML ( " content_host " , result . url ( ) . getHost ( ) ) ;
prop . putHTML ( " content_file " , result . url ( ) . getFile ( ) ) ;
prop . putHTML ( " content_path " , result . url ( ) . getPath ( ) ) ;
2009-01-07 00:16:10 +01:00
prop . put ( " content_nl " , ( item = = 0 ) ? 0 : 1 ) ;
2008-08-02 14:12:04 +02:00
final TreeSet < String > [ ] query = theQuery . queryWords ( ) ;
2009-10-11 02:12:19 +02:00
DigestURI wordURL = null ;
2007-09-06 15:26:38 +02:00
try {
2007-10-24 23:38:19 +02:00
prop . putHTML ( " content_words " , URLEncoder . encode ( query [ 0 ] . toString ( ) , " UTF-8 " ) ) ;
2008-08-02 14:12:04 +02:00
} catch ( final UnsupportedEncodingException e ) { }
2007-10-24 23:38:19 +02:00
prop . putHTML ( " content_former " , theQuery . queryString ) ;
2009-10-11 02:12:19 +02:00
prop . put ( " content_rankingprops " , result . word ( ) . toPropertyForm ( ) + " , domLengthEstimated= " + DigestURI . domLengthEstimation ( result . hash ( ) ) +
( ( DigestURI . probablyRootURL ( result . hash ( ) ) ) ? " , probablyRootURL " : " " ) +
( ( ( wordURL = DigestURI . probablyWordURL ( result . hash ( ) , query [ 0 ] ) ) ! = null ) ? " , probablyWordURL= " + wordURL . toNormalform ( false , true ) : " " ) ) ;
2009-08-27 16:34:41 +02:00
final TextSnippet snippet = result . textSnippet ( ) ;
2009-04-27 14:52:12 +02:00
final String desc = ( snippet = = null ) ? " " : snippet . getLineMarked ( theQuery . fullqueryHashes ) ;
prop . put ( " content_description " , desc ) ;
prop . putXML ( " content_description-xml " , desc ) ;
prop . putJSON ( " content_description-json " , desc ) ;
2009-12-08 15:25:51 +01:00
EventTracker . update ( " SEARCH " , new ProfilingGraph . searchEvent ( theQuery . id ( true ) , SearchEvent . FINALIZATION + " - " + item , 0 , 0 ) , false , 30000 , ProfilingGraph . maxTime ) ;
2008-03-23 01:55:04 +01:00
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-06 15:26:38 +02:00
}
2009-11-19 00:56:05 +01:00
if ( theQuery . contentdom = = ContentDomain . IMAGE ) {
2007-09-06 15:26:38 +02:00
// image search; shows thumbnails
2008-02-21 15:53:51 +01:00
2009-11-19 00:56:05 +01:00
prop . put ( " content " , theQuery . contentdom . getCode ( ) + 1 ) ; // switch on specific content
2009-08-27 16:34:41 +02:00
final MediaSnippet ms = theSearch . result ( ) . oneImage ( item ) ;
2008-02-21 15:53:51 +01:00
if ( ms = = null ) {
2009-11-23 17:10:50 +01:00
prop . put ( " content_item " , " 0 " ) ;
2008-02-21 15:53:51 +01:00
} else {
2009-11-23 17:10:50 +01:00
prop . putHTML ( " content_item_hrefCache " , ( auth ) ? " /ViewImage.png?url= " + ms . href . toNormalform ( true , false ) : ms . href . toNormalform ( true , false ) ) ;
prop . putHTML ( " content_item_href " , ms . href . toNormalform ( true , false ) ) ;
prop . put ( " content_item_code " , sb . licensedURLs . aquireLicense ( ms . href ) ) ;
prop . putHTML ( " content_item_name " , shorten ( ms . name , namelength ) ) ;
2009-11-24 12:13:11 +01:00
prop . put ( " content_item_mimetype " , ms . mime ) ;
2009-11-23 17:10:50 +01:00
prop . put ( " content_item_fileSize " , ms . fileSize ) ;
prop . put ( " content_item_width " , ms . width ) ;
prop . put ( " content_item_height " , ms . height ) ;
prop . put ( " content_item_attr " , ( ms . attr . equals ( " -1 x -1 " ) ) ? " " : " ( " + ms . attr + " ) " ) ; // attributes, here: original size of image
prop . put ( " content_item_urlhash " , ms . source . hash ( ) ) ;
prop . put ( " content_item_source " , ms . source . toNormalform ( true , false ) ) ;
2009-12-10 00:58:56 +01:00
prop . putXML ( " content_item_source-xml " , ms . source . toNormalform ( true , false ) ) ;
2009-11-23 17:10:50 +01:00
prop . put ( " content_item_sourcedom " , ms . source . getHost ( ) ) ;
prop . put ( " content_item_nl " , ( item = = 0 ) ? 0 : 1 ) ;
prop . put ( " content_item " , 1 ) ;
2007-09-07 13:45:38 +02:00
}
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-07 13:45:38 +02:00
}
2009-11-19 00:56:05 +01:00
if ( ( theQuery . contentdom = = ContentDomain . AUDIO ) | |
( theQuery . contentdom = = ContentDomain . VIDEO ) | |
( theQuery . contentdom = = ContentDomain . APP ) ) {
2007-09-07 13:45:38 +02:00
// any other media content
2008-02-21 15:53:51 +01:00
// generate result object
2009-08-24 17:24:02 +02:00
final ResultEntry result = theSearch . oneResult ( item ) ;
2008-02-21 15:53:51 +01:00
if ( result = = null ) return prop ; // no content
2009-11-19 00:56:05 +01:00
prop . put ( " content " , theQuery . contentdom . getCode ( ) + 1 ) ; // switch on specific content
2009-08-27 16:34:41 +02:00
final ArrayList < MediaSnippet > media = result . mediaSnippets ( ) ;
2007-09-07 13:45:38 +02:00
if ( item = = 0 ) col = true ;
if ( media ! = null ) {
2009-08-27 16:34:41 +02:00
MediaSnippet ms ;
2007-09-07 13:45:38 +02:00
int c = 0 ;
for ( int i = 0 ; i < media . size ( ) ; i + + ) {
2008-06-06 18:01:27 +02:00
ms = media . get ( i ) ;
2008-01-22 12:51:43 +01:00
prop . putHTML ( " content_items_ " + i + " _href " , ms . href . toNormalform ( true , false ) ) ;
prop . putHTML ( " content_items_ " + i + " _hrefshort " , nxTools . shortenURLString ( ms . href . toNormalform ( true , false ) , urllength ) ) ;
2007-10-24 23:38:19 +02:00
prop . putHTML ( " content_items_ " + i + " _name " , shorten ( ms . name , namelength ) ) ;
prop . put ( " content_items_ " + i + " _col " , ( col ) ? " 0 " : " 1 " ) ;
2007-09-07 13:45:38 +02:00
c + + ;
col = ! col ;
}
prop . put ( " content_items " , c ) ;
} else {
2007-10-24 23:38:19 +02:00
prop . put ( " content_items " , " 0 " ) ;
2007-09-06 15:26:38 +02:00
}
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-06 15:26:38 +02:00
}
2007-09-04 01:43:55 +02:00
2007-08-28 14:15:46 +02:00
return prop ;
}
2008-08-02 14:12:04 +02:00
private static String shorten ( final String s , final int length ) {
2007-09-07 13:45:38 +02:00
if ( s . length ( ) < = length ) return s ;
2008-08-02 14:12:04 +02:00
final int p = s . lastIndexOf ( '.' ) ;
2007-09-07 13:45:38 +02:00
if ( p < 0 ) return s . substring ( 0 , length - 3 ) + " ... " ;
2009-11-20 15:35:33 +01:00
return s . substring ( 0 , length - ( s . length ( ) - p ) - 3 ) + " ... " + s . substring ( p ) ; // TODO check oob
2007-09-07 13:45:38 +02:00
}
2009-02-23 08:59:11 +01:00
private static String sizename ( int size ) {
if ( size < 1024 ) return size + " bytes " ;
size = size / 1024 ;
if ( size < 1024 ) return size + " kbyte " ;
size = size / 1024 ;
if ( size < 1024 ) return size + " mbyte " ;
size = size / 1024 ;
return size + " gbyte " ;
}
2008-02-03 03:23:04 +01:00
2007-08-28 14:15:46 +02:00
}