2007-08-28 14:15:46 +02:00
// yacysearchitem.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 28.08.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
2009-09-27 00:07:40 +02:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2007-08-28 14:15:46 +02:00
//
// LICENSE
2011-06-13 23:44:03 +02:00
//
2007-08-28 14:15:46 +02:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2016-02-02 09:57:54 +01:00
import java.awt.Dimension ;
2015-06-07 20:37:37 +02:00
import java.io.File ;
2016-10-12 09:16:47 +02:00
import java.io.UnsupportedEncodingException ;
2007-08-28 14:15:46 +02:00
import java.net.MalformedURLException ;
2016-10-12 09:16:47 +02:00
import java.net.URLEncoder ;
import java.nio.charset.StandardCharsets ;
2014-11-28 01:19:31 +01:00
import java.util.Collection ;
2015-03-02 18:00:20 +01:00
import java.util.Date ;
2013-11-26 02:24:47 +01:00
import java.util.Iterator ;
2013-12-04 22:44:51 +01:00
2011-01-03 21:52:54 +01:00
import net.yacy.cora.date.GenericFormatter ;
2014-09-12 02:10:18 +02:00
import net.yacy.cora.date.ISO8601Formatter ;
2012-11-21 18:46:49 +01:00
import net.yacy.cora.document.analysis.Classification ;
import net.yacy.cora.document.analysis.Classification.ContentDomain ;
2013-09-15 00:30:23 +02:00
import net.yacy.cora.document.encoding.ASCII ;
import net.yacy.cora.document.feed.RSSMessage ;
import net.yacy.cora.document.id.DigestURL ;
import net.yacy.cora.document.id.MultiProtocolURL ;
2010-08-23 14:32:02 +02:00
import net.yacy.cora.protocol.HeaderFramework ;
import net.yacy.cora.protocol.RequestHeader ;
2011-10-13 00:26:48 +02:00
import net.yacy.cora.protocol.RequestHeader.FileType ;
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2013-12-04 22:44:51 +01:00
import net.yacy.cora.util.Memory ;
2013-06-09 08:15:23 +02:00
import net.yacy.crawler.data.Cache ;
2015-06-07 20:37:37 +02:00
import net.yacy.crawler.data.Transactions ;
import net.yacy.crawler.data.Transactions.State ;
2015-05-26 04:15:00 +02:00
import net.yacy.crawler.retrieval.Response ;
2013-02-13 00:33:53 +01:00
import net.yacy.data.URLLicense ;
2016-02-02 09:57:54 +01:00
import net.yacy.document.parser.html.IconEntry ;
2014-11-28 01:19:31 +01:00
import net.yacy.kelondro.data.meta.URIMetadataNode ;
2009-10-10 03:14:19 +02:00
import net.yacy.kelondro.util.Formatter ;
2011-10-04 11:06:24 +02:00
import net.yacy.peers.NewsPool ;
import net.yacy.peers.Seed ;
2011-09-25 18:59:06 +02:00
import net.yacy.peers.graphics.ProfilingGraph ;
2012-05-04 17:28:27 +02:00
import net.yacy.search.EventTracker ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
import net.yacy.search.SwitchboardConstants ;
2012-11-01 17:16:43 +01:00
import net.yacy.search.query.HeuristicResult ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.query.QueryParams ;
import net.yacy.search.query.SearchEvent ;
import net.yacy.search.query.SearchEventCache ;
2012-11-01 17:16:43 +01:00
import net.yacy.search.query.SearchEventType ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.snippet.TextSnippet ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
import net.yacy.server.serverSwitch ;
import net.yacy.utils.crypt ;
import net.yacy.utils.nxTools ;
2016-02-09 20:46:44 +01:00
import net.yacy.visualization.ImageViewer ;
2007-08-28 14:15:46 +02:00
public class yacysearchitem {
2011-06-13 23:44:03 +02:00
2011-03-08 23:37:17 +01:00
private static final String SHORTEN_SUFFIX = " ... " ;
private static final int SHORTEN_SUFFIX_LENGTH = SHORTEN_SUFFIX . length ( ) ;
private static final int MAX_NAME_LENGTH = 60 ;
private static final int MAX_URL_LENGTH = 120 ;
2015-10-21 02:49:51 +02:00
/** Default image item width in pixels */
private static final int DEFAULT_IMG_WIDTH = 128 ;
/** Default image item height in pixels */
private static final int DEFAULT_IMG_HEIGHT = DEFAULT_IMG_WIDTH ;
2007-08-28 14:15:46 +02:00
2012-05-16 13:42:32 +02:00
//private static boolean col = true;
2011-06-13 23:44:03 +02:00
2009-07-19 22:37:44 +02:00
public static serverObjects respond ( final RequestHeader header , final serverObjects post , final serverSwitch env ) {
final Switchboard sb = ( Switchboard ) env ;
2007-08-28 14:15:46 +02:00
final serverObjects prop = new serverObjects ( ) ;
2011-06-13 23:44:03 +02:00
2008-08-02 14:12:04 +02:00
final String eventID = post . get ( " eventID " , " " ) ;
2015-10-21 02:49:51 +02:00
final boolean authenticated = sb . verifyAuthentication ( header ) ;
2008-08-02 14:12:04 +02:00
final int item = post . getInt ( " item " , - 1 ) ;
2011-10-13 00:26:48 +02:00
final RequestHeader . FileType fileType = header . fileType ( ) ;
2011-03-08 23:37:17 +01:00
2007-09-09 14:30:18 +02:00
// default settings for blank item
2007-10-24 23:38:19 +02:00
prop . put ( " content " , " 0 " ) ;
prop . put ( " rss " , " 0 " ) ;
prop . put ( " references " , " 0 " ) ;
prop . put ( " rssreferences " , " 0 " ) ;
prop . put ( " dynamic " , " 0 " ) ;
2016-12-20 14:52:33 +01:00
prop . put ( " localQuery " , " 0 " ) ;
2017-03-08 10:27:18 +01:00
prop . put ( " statistics " , " 0 " ) ;
2011-06-13 23:44:03 +02:00
2007-08-28 14:15:46 +02:00
// find search event
2009-08-24 17:24:02 +02:00
final SearchEvent theSearch = SearchEventCache . getEvent ( eventID ) ;
2007-09-09 14:30:18 +02:00
if ( theSearch = = null ) {
// the event does not exist, show empty page
return prop ;
}
2011-06-13 23:44:03 +02:00
2007-09-04 01:43:55 +02:00
// dynamically update count values
2017-03-08 10:27:18 +01:00
prop . put ( " statistics " , " 1 " ) ;
prop . put ( " statistics_offset " , theSearch . query . neededResults ( ) - theSearch . query . itemsPerPage ( ) + 1 ) ;
prop . put ( " statistics_itemscount " , Formatter . number ( Math . min ( ( item < 0 ) ? theSearch . query . neededResults ( ) : item + 1 , theSearch . getResultCount ( ) ) ) ) ;
prop . put ( " statistics_itemsperpage " , Formatter . number ( theSearch . query . itemsPerPage ) ) ;
prop . put ( " statistics_totalcount " , Formatter . number ( theSearch . getResultCount ( ) , true ) ) ;
prop . put ( " statistics_localResourceSize " , Formatter . number ( theSearch . local_rwi_stored . get ( ) + theSearch . local_solr_stored . get ( ) , true ) ) ;
prop . put ( " statistics_remoteResourceSize " , Formatter . number ( theSearch . remote_rwi_stored . get ( ) + theSearch . remote_solr_stored . get ( ) , true ) ) ;
prop . put ( " statistics_remoteIndexCount " , Formatter . number ( theSearch . remote_rwi_available . get ( ) + theSearch . remote_solr_available . get ( ) , true ) ) ;
prop . put ( " statistics_remotePeerCount " , Formatter . number ( theSearch . remote_rwi_peerCount . get ( ) + theSearch . remote_solr_peerCount . get ( ) , true ) ) ;
prop . put ( " statistics_navurlBase " , QueryParams . navurlBase ( RequestHeader . FileType . HTML , theSearch . query , null , false ) . toString ( ) ) ;
prop . put ( " statistics_localQuery " , theSearch . query . isLocal ( ) ? " 1 " : " 0 " ) ;
2012-01-26 15:32:30 +01:00
final String target_special_pattern = sb . getConfig ( SwitchboardConstants . SEARCH_TARGET_SPECIAL_PATTERN , " " ) ;
2017-02-28 18:11:54 +01:00
final boolean noreferrer = sb . getConfigBool ( SwitchboardConstants . SEARCH_RESULT_NOREFERRER , SwitchboardConstants . SEARCH_RESULT_NOREFERRER_DEFAULT ) ;
2011-06-13 23:44:03 +02:00
2013-03-19 11:23:18 +01:00
long timeout = item = = 0 ? 10000 : ( theSearch . query . isLocal ( ) ? 1000 : 3000 ) ;
2012-11-05 03:19:28 +01:00
if ( theSearch . query . contentdom = = Classification . ContentDomain . TEXT | | theSearch . query . contentdom = = Classification . ContentDomain . ALL ) {
2007-09-06 15:26:38 +02:00
// text search
2008-02-21 15:53:51 +01:00
// generate result object
2015-05-26 04:15:00 +02:00
final URIMetadataNode result = theSearch . oneResult ( item , timeout ) ;
2008-02-21 15:53:51 +01:00
if ( result = = null ) return prop ; // no content
2012-01-26 15:32:30 +01:00
final String resultUrlstring = result . urlstring ( ) ;
2013-09-15 00:30:23 +02:00
final DigestURL resultURL = result . url ( ) ;
2012-01-26 15:32:30 +01:00
final String target = sb . getConfig ( resultUrlstring . matches ( target_special_pattern ) ? SwitchboardConstants . SEARCH_TARGET_SPECIAL : SwitchboardConstants . SEARCH_TARGET_DEFAULT , " _self " ) ;
2012-11-05 03:19:28 +01:00
final String resource = theSearch . query . domType . toString ( ) ;
2014-01-20 00:58:17 +01:00
final String origQ = theSearch . query . getQueryGoal ( ) . getQueryString ( true ) ;
2008-12-02 16:24:25 +01:00
prop . put ( " content " , 1 ) ; // switch on specific content
prop . put ( " content_authorized " , authenticated ? " 1 " : " 0 " ) ;
2011-11-24 15:57:09 +01:00
final String urlhash = ASCII . String ( result . hash ( ) ) ;
2015-01-01 02:41:20 +01:00
if ( authenticated ) { // only needed if authorized
2016-10-12 09:31:42 +02:00
addAuthorizedActions ( sb , prop , theSearch , resultUrlstring , resource , origQ , urlhash ) ;
2015-01-01 02:41:20 +01:00
}
2008-12-02 16:24:25 +01:00
prop . putHTML ( " content_title " , result . title ( ) ) ;
2009-01-17 00:22:42 +01:00
prop . putXML ( " content_title-xml " , result . title ( ) ) ;
2009-04-26 21:17:36 +02:00
prop . putJSON ( " content_title-json " , result . title ( ) ) ;
2012-01-26 15:32:30 +01:00
prop . putHTML ( " content_showPictures_link " , resultUrlstring ) ;
2012-06-10 10:51:53 +02:00
//prop.putHTML("content_link", resultUrlstring);
// START interaction
2014-12-25 02:16:19 +01:00
if ( sb . getConfigBool ( " proxyURL.useforresults " , false ) & & sb . getConfigBool ( " proxyURL " , false ) ) {
String modifyURL = resultUrlstring ;
// check if url is allowed to view
final String tmprewritecfg = sb . getConfig ( " proxyURL.rewriteURLs " , " all " ) ;
if ( tmprewritecfg . equals ( " all " ) ) {
modifyURL = " ./proxy.html?url= " + resultUrlstring ;
} else if ( tmprewritecfg . equals ( " domainlist " ) ) { // check if url is allowed to view
try {
if ( sb . crawlStacker . urlInAcceptedDomain ( new DigestURL ( resultUrlstring ) ) = = null ) {
modifyURL = " ./proxy.html?url= " + resultUrlstring ;
}
} catch ( final MalformedURLException e ) {
ConcurrentLog . logException ( e ) ;
}
} else if ( tmprewritecfg . equals ( " yacy " ) ) {
try {
if ( ( new DigestURL ( resultUrlstring ) . getHost ( ) . endsWith ( " .yacy " ) ) ) {
modifyURL = " ./proxy.html?url= " + resultUrlstring ;
}
} catch ( final MalformedURLException e ) {
ConcurrentLog . logException ( e ) ;
}
}
prop . putXML ( " content_link " , modifyURL ) ; // putXML for rss
} else {
prop . putXML ( " content_link " , resultUrlstring ) ; // putXML for rss
}
2017-02-28 18:11:54 +01:00
prop . put ( " content_noreferrer " , noreferrer ? 1 : 0 ) ;
2014-12-25 02:16:19 +01:00
2012-06-10 10:51:53 +02:00
// END interaction
2014-09-12 02:10:18 +02:00
boolean isAtomFeed = header . get ( HeaderFramework . CONNECTION_PROP_EXT , " " ) . equals ( " atom " ) ;
2013-06-25 16:27:20 +02:00
String resultFileName = resultURL . getFileName ( ) ;
2011-02-02 01:50:06 +01:00
prop . putHTML ( " content_target " , target ) ;
2016-02-04 08:14:49 +01:00
DigestURL faviconURL = null ;
2017-01-06 09:00:28 +01:00
if ( ( fileType = = FileType . HTML | | fileType = = FileType . JSON ) & & ( resultURL . isHTTP ( ) | | resultURL . isHTTPS ( ) ) ) {
2016-02-04 08:14:49 +01:00
faviconURL = getFaviconURL ( result , new Dimension ( 16 , 16 ) ) ;
}
2017-01-06 09:00:28 +01:00
if ( faviconURL = = null ) {
prop . put ( " content_favicon " , 0 ) ;
} else {
prop . put ( " content_favicon " , 1 ) ;
}
prop . putHTML ( " content_favicon_faviconUrl " , processFaviconURL ( ImageViewer . hasFullViewingRights ( header , sb ) , faviconURL ) ) ;
2014-09-12 02:10:18 +02:00
prop . put ( " content_urlhash " , urlhash ) ;
2014-05-22 03:01:07 +02:00
prop . put ( " content_ranking " , Float . toString ( result . score ( ) ) ) ;
2015-03-02 18:00:20 +01:00
Date [ ] events = result . events ( ) ;
boolean showEvent = events ! = null & & events . length > 0 & & sb . getConfig ( " search.navigation " , " " ) . indexOf ( " date " , 0 ) > = 0 ;
prop . put ( " content_showEvent " , showEvent ? 1 : 0 ) ;
2015-06-07 20:37:37 +02:00
Collection < File > snapshotPaths = sb . getConfigBool ( " search.result.show.snapshots " , true ) ? Transactions . findPaths ( result . url ( ) , null , State . ANY ) : null ;
2014-09-12 02:10:18 +02:00
if ( fileType = = FileType . HTML ) { // html template specific settings
2015-03-02 18:00:20 +01:00
prop . put ( " content_showDate " , sb . getConfigBool ( " search.result.show.date " , true ) & & ! showEvent ? 1 : 0 ) ;
2014-09-12 02:10:18 +02:00
prop . put ( " content_showSize " , sb . getConfigBool ( " search.result.show.size " , true ) ? 1 : 0 ) ;
prop . put ( " content_showMetadata " , sb . getConfigBool ( " search.result.show.metadata " , true ) ? 1 : 0 ) ;
prop . put ( " content_showParser " , sb . getConfigBool ( " search.result.show.parser " , true ) ? 1 : 0 ) ;
prop . put ( " content_showCitation " , sb . getConfigBool ( " search.result.show.citation " , true ) ? 1 : 0 ) ;
prop . put ( " content_showPictures " , sb . getConfigBool ( " search.result.show.pictures " , true ) ? 1 : 0 ) ;
prop . put ( " content_showCache " , sb . getConfigBool ( " search.result.show.cache " , true ) & & Cache . has ( resultURL . hash ( ) ) ? 1 : 0 ) ;
2015-04-14 02:07:02 +02:00
prop . put ( " content_showProxy " , sb . getConfigBool ( " search.result.show.proxy " , true ) & & sb . getConfigBool ( " proxyURL " , false ) ? 1 : 0 ) ;
2014-09-12 02:10:18 +02:00
prop . put ( " content_showHostBrowser " , sb . getConfigBool ( " search.result.show.hostbrowser " , true ) ? 1 : 0 ) ;
2015-06-07 20:37:37 +02:00
prop . put ( " content_showSnapshots " , snapshotPaths ! = null & & snapshotPaths . size ( ) > 0 & & sb . getConfigBool ( " search.result.show.snapshots " , true ) ? 1 : 0 ) ;
2014-11-28 01:19:31 +01:00
prop . put ( " content_showVocabulary " , sb . getConfigBool ( " search.result.show.vocabulary " , true ) ? 1 : 0 ) ;
2014-09-12 02:10:18 +02:00
2015-03-02 18:00:20 +01:00
if ( showEvent ) prop . put ( " content_showEvent_date " , GenericFormatter . RFC1123_SHORT_FORMATTER . format ( events [ 0 ] ) ) ;
2015-05-25 21:28:48 +02:00
prop . put ( " content_showDate_date " , GenericFormatter . RFC1123_SHORT_FORMATTER . format ( result . moddate ( ) ) ) ;
2014-09-12 02:10:18 +02:00
prop . putHTML ( " content_showSize_sizename " , RSSMessage . sizename ( result . filesize ( ) ) ) ;
prop . put ( " content_showMetadata_urlhash " , urlhash ) ;
prop . put ( " content_showParser_urlhash " , urlhash ) ;
prop . put ( " content_showCitation_urlhash " , urlhash ) ;
prop . putHTML ( " content_showPictures_former " , origQ ) ;
prop . put ( " content_showCache_link " , resultUrlstring ) ;
prop . put ( " content_showProxy_link " , resultUrlstring ) ;
prop . put ( " content_showHostBrowser_link " , resultUrlstring ) ;
2014-11-28 01:19:31 +01:00
if ( sb . getConfigBool ( " search.result.show.vocabulary " , true ) ) {
int c = 0 ;
2015-05-26 04:15:00 +02:00
for ( String key : result . getFieldNames ( ) ) {
2014-11-28 01:19:31 +01:00
if ( key . startsWith ( " vocabulary_ " ) & & key . endsWith ( " _sxt " ) ) {
2015-05-26 04:15:00 +02:00
Collection < Object > terms = result . getFieldValues ( key ) ;
2014-11-28 01:19:31 +01:00
prop . putHTML ( " content_showVocabulary_vocabulary_ " + c + " _name " , key . substring ( 11 , key . length ( ) - 4 ) ) ;
prop . putHTML ( " content_showVocabulary_vocabulary_ " + c + " _terms " , terms . toString ( ) ) ;
c + + ;
}
}
prop . put ( " content_showVocabulary_vocabulary " , c ) ;
prop . put ( " content_showVocabulary " , 1 ) ;
} else {
prop . put ( " content_showVocabulary_vocabulary " , 0 ) ;
prop . put ( " content_showVocabulary " , 0 ) ;
}
2015-06-07 20:37:37 +02:00
if ( snapshotPaths ! = null & & snapshotPaths . size ( ) > 0 ) {
prop . put ( " content_showSnapshots_link " , snapshotPaths . iterator ( ) . next ( ) . getAbsolutePath ( ) ) ;
}
2014-09-12 02:10:18 +02:00
}
prop . put ( " content_urlhexhash " , Seed . b64Hash2hexHash ( urlhash ) ) ;
2011-03-08 23:37:17 +01:00
prop . putHTML ( " content_urlname " , nxTools . shortenURLString ( result . urlname ( ) , MAX_URL_LENGTH ) ) ;
2015-05-25 21:28:48 +02:00
prop . put ( " content_date822 " , isAtomFeed ? ISO8601Formatter . FORMATTER . format ( result . moddate ( ) ) : HeaderFramework . formatRFC1123 ( result . moddate ( ) ) ) ;
2015-03-02 18:00:20 +01:00
if ( showEvent ) prop . put ( " content_showEvent_date822 " , isAtomFeed ? ISO8601Formatter . FORMATTER . format ( events [ 0 ] ) : HeaderFramework . formatRFC1123 ( events [ 0 ] ) ) ;
2010-05-04 22:57:09 +02:00
//prop.put("content_ybr", RankingProcess.ybr(result.hash()));
2009-02-23 12:39:20 +01:00
prop . putHTML ( " content_size " , Integer . toString ( result . filesize ( ) ) ) ; // we don't use putNUM here because that number shall be usable as sorting key. To print the size, use 'sizename'
2014-09-12 02:10:18 +02:00
prop . putHTML ( " content_sizename " , RSSMessage . sizename ( result . filesize ( ) ) ) ;
2010-12-01 19:48:21 +01:00
prop . putHTML ( " content_host " , resultURL . getHost ( ) = = null ? " " : resultURL . getHost ( ) ) ;
2014-06-07 03:01:26 +02:00
prop . putXML ( " content_file " , resultFileName ) ; // putXML for rss
prop . putXML ( " content_path " , resultURL . getPath ( ) ) ; // putXML for rss
2012-11-05 03:19:28 +01:00
prop . put ( " content_nl " , ( item = = theSearch . query . offset ) ? 0 : 1 ) ;
2015-05-25 21:28:48 +02:00
prop . putHTML ( " content_publisher " , result . dc_publisher ( ) ) ;
prop . putHTML ( " content_creator " , result . dc_creator ( ) ) ; // author
prop . putHTML ( " content_subject " , result . dc_subject ( ) ) ;
2013-11-26 02:24:47 +01:00
final Iterator < String > query = theSearch . query . getQueryGoal ( ) . getIncludeStrings ( ) ;
final StringBuilder s = new StringBuilder ( theSearch . query . getQueryGoal ( ) . getIncludeSize ( ) * 20 ) ;
while ( query . hasNext ( ) ) s . append ( '+' ) . append ( query . next ( ) ) ;
2011-03-08 23:37:17 +01:00
final String words = ( s . length ( ) > 0 ) ? s . substring ( 1 ) : " " ;
prop . putHTML ( " content_words " , words ) ;
prop . putHTML ( " content_showParser_words " , words ) ;
2012-12-15 00:05:46 +01:00
prop . putHTML ( " content_former " , origQ ) ;
2009-08-27 16:34:41 +02:00
final TextSnippet snippet = result . textSnippet ( ) ;
2014-07-28 15:42:57 +02:00
final String desc = ( snippet = = null ) ? " " : snippet . descriptionline ( theSearch . query . getQueryGoal ( ) ) ;
2015-02-14 02:43:05 +01:00
prop . put ( " content_description " , desc ) ;
2009-04-27 14:52:12 +02:00
prop . putXML ( " content_description-xml " , desc ) ;
prop . putJSON ( " content_description-json " , desc ) ;
2015-05-25 21:28:48 +02:00
prop . put ( " content_mimetype " , result . mime ( ) ) ; // for atom <link> type attribute
2012-11-01 17:16:43 +01:00
final HeuristicResult heuristic = theSearch . getHeuristic ( result . hash ( ) ) ;
2010-06-27 23:38:16 +02:00
if ( heuristic = = null ) {
prop . put ( " content_heuristic " , 0 ) ;
} else {
if ( heuristic . redundant ) {
prop . put ( " content_heuristic " , 1 ) ;
} else {
prop . put ( " content_heuristic " , 2 ) ;
}
prop . put ( " content_heuristic_name " , heuristic . heuristicName ) ;
}
2012-11-05 03:19:28 +01:00
EventTracker . update ( EventTracker . EClass . SEARCH , new ProfilingGraph . EventSearch ( theSearch . query . id ( true ) , SearchEventType . FINALIZATION , " " + item , 0 , 0 ) , false ) ;
2015-05-26 04:15:00 +02:00
if ( result . doctype ( ) = = Response . DT_IMAGE ) {
2013-02-13 00:33:53 +01:00
final String license = URLLicense . aquireLicense ( resultURL ) ;
2010-12-01 19:48:21 +01:00
prop . put ( " content_code " , license ) ;
} else {
prop . put ( " content_code " , " " ) ;
}
2012-06-04 23:44:26 +02:00
if ( result . lat ( ) = = 0 . 0d | | result . lon ( ) = = 0 . 0d ) {
2011-03-31 01:26:36 +02:00
prop . put ( " content_loc " , 0 ) ;
} else {
prop . put ( " content_loc " , 1 ) ;
prop . put ( " content_loc_lat " , result . lat ( ) ) ;
prop . put ( " content_loc_lon " , result . lon ( ) ) ;
}
2013-06-11 14:42:30 +02:00
final boolean clustersearch = sb . isRobinsonMode ( ) & & sb . getConfig ( SwitchboardConstants . CLUSTER_MODE , " " ) . equals ( SwitchboardConstants . CLUSTER_MODE_PUBLIC_CLUSTER ) ;
2013-11-13 13:38:01 +01:00
final boolean indexReceiveGranted = sb . getConfigBool ( SwitchboardConstants . INDEX_RECEIVE_ALLOW_SEARCH , true ) | | clustersearch ;
2013-06-11 14:42:30 +02:00
boolean p2pmode = sb . peers ! = null & & sb . peers . sizeConnected ( ) > 0 & & indexReceiveGranted ;
2013-06-28 15:33:19 +02:00
boolean stealthmode = p2pmode & & theSearch . query . isLocal ( ) ;
2013-06-11 14:42:30 +02:00
if ( ( sb . getConfigBool ( SwitchboardConstants . HEURISTIC_SEARCHRESULTS , false ) | |
2013-12-04 22:44:51 +01:00
( sb . getConfigBool ( SwitchboardConstants . GREEDYLEARNING_ACTIVE , false ) & & sb . getConfigBool ( SwitchboardConstants . GREEDYLEARNING_ENABLED , false ) & & Memory . load ( ) < 1 . 0 ) ) & &
2016-02-16 02:05:58 +01:00
! stealthmode ) sb . heuristicSearchResults ( result ) ;
2012-11-05 03:19:28 +01:00
theSearch . query . transmitcount = item + 1 ;
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-06 15:26:38 +02:00
}
2011-03-08 23:37:17 +01:00
2012-11-05 03:19:28 +01:00
if ( theSearch . query . contentdom = = Classification . ContentDomain . IMAGE ) {
2007-09-06 15:26:38 +02:00
// image search; shows thumbnails
2017-02-28 18:11:54 +01:00
processImage ( sb , prop , item , theSearch , target_special_pattern , timeout , ImageViewer . hasFullViewingRights ( header , sb ) , noreferrer ) ;
2012-11-05 03:19:28 +01:00
theSearch . query . transmitcount = item + 1 ;
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-07 13:45:38 +02:00
}
2011-06-13 23:44:03 +02:00
2012-11-05 03:19:28 +01:00
if ( ( theSearch . query . contentdom = = ContentDomain . AUDIO ) | |
( theSearch . query . contentdom = = ContentDomain . VIDEO ) | |
( theSearch . query . contentdom = = ContentDomain . APP ) ) {
2007-09-07 13:45:38 +02:00
// any other media content
2008-02-21 15:53:51 +01:00
// generate result object
2015-05-26 04:15:00 +02:00
final URIMetadataNode ms = theSearch . oneResult ( item , timeout ) ;
2012-11-05 03:19:28 +01:00
prop . put ( " content " , theSearch . query . contentdom . getCode ( ) + 1 ) ; // switch on specific content
2012-04-27 14:18:02 +02:00
if ( ms = = null ) {
prop . put ( " content_item " , " 0 " ) ;
2007-09-07 13:45:38 +02:00
} else {
2012-10-10 11:46:22 +02:00
final String resultUrlstring = ms . url ( ) . toNormalform ( true ) ;
2012-04-27 14:18:02 +02:00
final String target = sb . getConfig ( resultUrlstring . matches ( target_special_pattern ) ? SwitchboardConstants . SEARCH_TARGET_SPECIAL : SwitchboardConstants . SEARCH_TARGET_DEFAULT , " _self " ) ;
prop . putHTML ( " content_item_href " , resultUrlstring ) ;
2017-02-28 18:11:54 +01:00
prop . put ( " content_item_noreferrer " , noreferrer ? 1 : 0 ) ;
2012-04-27 14:18:02 +02:00
prop . putHTML ( " content_item_hrefshort " , nxTools . shortenURLString ( resultUrlstring , MAX_URL_LENGTH ) ) ;
prop . putHTML ( " content_item_target " , target ) ;
prop . putHTML ( " content_item_name " , shorten ( ms . title ( ) , MAX_NAME_LENGTH ) ) ;
prop . put ( " content_item_col " , ( item % 2 = = 0 ) ? " 0 " : " 1 " ) ;
2012-11-05 03:19:28 +01:00
prop . put ( " content_item_nl " , ( item = = theSearch . query . offset ) ? 0 : 1 ) ;
2012-04-27 14:18:02 +02:00
prop . put ( " content_item " , 1 ) ;
2007-09-06 15:26:38 +02:00
}
2012-11-05 03:19:28 +01:00
theSearch . query . transmitcount = item + 1 ;
2007-09-09 14:30:18 +02:00
return prop ;
2007-09-06 15:26:38 +02:00
}
2011-06-13 23:44:03 +02:00
2007-08-28 14:15:46 +02:00
return prop ;
}
2016-01-08 20:42:57 +01:00
2016-02-02 09:57:54 +01:00
/ * *
* Tries to retrieve favicon url from solr result document , or generates
* default favicon URL ( i . e . " http://host/favicon.ico " ) from resultURL and
* port .
*
* @param result
* solr document result . Must not be null .
* @param preferredSize preferred icon size . If no one matches , most close icon is returned .
* @return favicon URL or null when even default favicon URL can not be generated
* @throws NullPointerException when one requested parameter is null
* /
2016-02-04 08:14:49 +01:00
protected static DigestURL getFaviconURL ( final URIMetadataNode result , Dimension preferredSize ) {
/ *
* We look preferably for a standard icon with preferred size , but
* accept as a fallback other icons below 128x128 or with no known size
* /
2016-02-05 17:05:36 +01:00
IconEntry faviconEntry = result . getFavicon ( preferredSize ) ;
2016-02-04 08:14:49 +01:00
DigestURL faviconURL ;
2016-02-05 17:05:36 +01:00
if ( faviconEntry = = null ) {
try {
2016-02-02 09:57:54 +01:00
String defaultFaviconURL = result . url ( ) . getProtocol ( ) + " :// " + result . url ( ) . getHost ( )
+ ( ( result . url ( ) . getPort ( ) ! = - 1 ) ? ( " : " + result . url ( ) . getPort ( ) ) : " " ) + " /favicon.ico " ;
2016-02-04 08:14:49 +01:00
faviconURL = new DigestURL ( defaultFaviconURL ) ;
2016-02-05 17:05:36 +01:00
} catch ( final MalformedURLException e1 ) {
ConcurrentLog . logException ( e1 ) ;
faviconURL = null ;
2016-02-02 09:57:54 +01:00
}
2016-02-05 17:05:36 +01:00
} else {
faviconURL = faviconEntry . getUrl ( ) ;
2016-02-02 09:57:54 +01:00
}
2016-02-05 17:05:36 +01:00
2016-02-02 09:57:54 +01:00
return faviconURL ;
}
2016-01-08 20:42:57 +01:00
/ * *
2016-11-28 22:10:05 +01:00
* @param hasFullViewingRights
* true when current user has full favicon viewing rights
2016-01-08 20:42:57 +01:00
* @param faviconURL
* url icon of web site
* @return url to propose in search result or empty string when faviconURL
* is null
* /
2016-11-28 22:10:05 +01:00
private static String processFaviconURL ( final boolean hasFullViewingRights , DigestURL faviconURL ) {
2016-01-08 20:42:57 +01:00
/* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */
StringBuilder contentFaviconURL = new StringBuilder ( ) ;
if ( faviconURL ! = null ) {
2016-10-22 08:23:48 +02:00
final String iconUrlExt = MultiProtocolURL . getFileExtension ( faviconURL . getFileName ( ) ) ;
/* Image format ouput for ViewFavicon servlet : default is png, except with gif and svg icons */
final String viewFaviconExt = ! iconUrlExt . isEmpty ( ) & & ImageViewer . isBrowserRendered ( iconUrlExt ) ? iconUrlExt : " png " ;
2016-02-09 20:46:44 +01:00
contentFaviconURL . append ( " ViewFavicon. " ) . append ( viewFaviconExt ) . append ( " ?maxwidth=16&maxheight=16&isStatic=true&quadratic " ) ;
2016-11-28 22:10:05 +01:00
if ( hasFullViewingRights ) {
2016-01-08 20:42:57 +01:00
contentFaviconURL . append ( " &url= " ) . append ( faviconURL . toNormalform ( true ) ) ;
} else {
contentFaviconURL . append ( " &code= " ) . append ( URLLicense . aquireLicense ( faviconURL ) ) ;
}
}
return contentFaviconURL . toString ( ) ;
}
2016-10-14 11:29:55 +02:00
2016-10-12 09:31:42 +02:00
/ * *
* Add action links reserved to authorized users . All parameters must be non null .
* @param sb the main Switchboard instance
* @param prop properties map to feed
* @param theSearch search event
* @param resultUrlstring URL of the result item
* @param resource resource scope ( " local " or " global " )
* @param origQ origin query terms
* @param urlhash URL hash of the result item
* /
private static void addAuthorizedActions ( final Switchboard sb , final serverObjects prop ,
final SearchEvent theSearch , final String resultUrlstring , final String resource , final String origQ ,
final String urlhash ) {
// check if url exists in bookmarks
boolean bookmarkexists = sb . bookmarksDB . getBookmark ( urlhash ) ! = null ;
prop . put ( " content_authorized_bookmark " , ! bookmarkexists ) ;
// bookmark icon check for YMarks
//prop.put("content_authorized_bookmark", sb.tables.bookmarks.hasBookmark("admin", urlhash) ? "0" : "1");
/* Bookmark, delete and recommend action links share the same URL prefix */
StringBuilder linkBuilder = new StringBuilder ( ) ;
String actionLinkPrefix = linkBuilder . append ( " yacysearch.html?query= " ) . append ( origQ . replace ( ' ' , '+' ) )
. append ( " &Enter=Search&count= " ) . append ( theSearch . query . itemsPerPage ( ) ) . append ( " &offset= " )
. append ( ( theSearch . query . neededResults ( ) - theSearch . query . itemsPerPage ( ) ) ) . append ( " &resource= " )
. append ( resource ) . append ( " &time=3 " ) . toString ( ) ;
linkBuilder . setLength ( 0 ) ;
String encodedURLString ;
try {
encodedURLString = URLEncoder . encode ( crypt . simpleEncode ( resultUrlstring ) , StandardCharsets . UTF_8 . name ( ) ) ;
} catch ( UnsupportedEncodingException e1 ) {
ConcurrentLog . warn ( " YACY_SEARCH_ITEM " , " UTF-8 encoding is not supported! " ) ;
encodedURLString = crypt . simpleEncode ( resultUrlstring ) ;
}
String bookmarkLink = linkBuilder . append ( actionLinkPrefix ) . append ( " &bookmarkref= " ) . append ( urlhash )
. append ( " &bookmarkurl= " ) . append ( encodedURLString ) . append ( " &urlmaskfilter=.* " )
. toString ( ) ;
linkBuilder . setLength ( 0 ) ;
/* Delete and recommend action links share the same URL suffix */
String encodedRanking ;
try {
encodedRanking = URLEncoder . encode ( crypt . simpleEncode ( theSearch . query . ranking . toExternalString ( ) ) , StandardCharsets . UTF_8 . name ( ) ) ;
} catch ( UnsupportedEncodingException e1 ) {
ConcurrentLog . warn ( " YACY_SEARCH_ITEM " , " UTF-8 encoding is not supported! " ) ;
encodedRanking = crypt . simpleEncode ( resultUrlstring ) ;
}
String actionLinkSuffix = linkBuilder . append ( urlhash )
. append ( " &urlmaskfilter=.* " ) . append ( " &order= " ) . append ( encodedRanking ) . toString ( ) ;
linkBuilder . setLength ( 0 ) ;
String deleteLink = linkBuilder . append ( actionLinkPrefix ) . append ( " &deleteref= " ) . append ( actionLinkSuffix ) . toString ( ) ;
linkBuilder . setLength ( 0 ) ;
String recommendLink = linkBuilder . append ( actionLinkPrefix ) . append ( " &recommendref= " ) . append ( actionLinkSuffix ) . toString ( ) ;
linkBuilder . setLength ( 0 ) ;
prop . put ( " content_authorized_bookmark_bookmarklink " , bookmarkLink ) ;
prop . put ( " content_authorized_recommend_deletelink " , deleteLink ) ;
prop . put ( " content_authorized_recommend_recommendlink " , recommendLink ) ;
prop . put ( " content_authorized_recommend " , ( sb . peers . newsPool . getSpecific ( NewsPool . OUTGOING_DB , NewsPool . CATEGORY_SURFTIPP_ADD , " url " , resultUrlstring ) = = null ) ? " 1 " : " 0 " ) ;
prop . put ( " content_authorized_urlhash " , urlhash ) ;
}
2015-10-21 02:49:51 +02:00
/ * *
* Process search of image type and feed prop object . All parameters must not be null .
* @param sb Switchboard instance
* @param prop result
* @param item item index .
* @param theSearch search event
* @param target_special_pattern
* @param timeout result getting timeOut
2016-11-28 22:10:05 +01:00
* @param fullViewingRights set to true when current user has full image viewing rights
2017-02-28 18:11:54 +01:00
* @param noreferrer set to true when the noreferrer link type should be added to the original image source links
2015-10-21 02:49:51 +02:00
* /
private static void processImage ( final Switchboard sb , final serverObjects prop , final int item ,
2017-02-28 18:11:54 +01:00
final SearchEvent theSearch , final String target_special_pattern , long timeout , boolean fullViewingRights , final boolean noreferrer ) {
2015-10-21 02:49:51 +02:00
prop . put ( " content " , theSearch . query . contentdom . getCode ( ) + 1 ) ; // switch on specific content
try {
SearchEvent . ImageResult image = theSearch . oneImageResult ( item , timeout ) ;
final String imageUrlstring = image . imageUrl . toNormalform ( true ) ;
final String imageUrlExt = MultiProtocolURL . getFileExtension ( image . imageUrl . getFileName ( ) ) ;
final String target = sb . getConfig ( imageUrlstring . matches ( target_special_pattern ) ? SwitchboardConstants . SEARCH_TARGET_SPECIAL : SwitchboardConstants . SEARCH_TARGET_DEFAULT , " _self " ) ;
final String license = URLLicense . aquireLicense ( image . imageUrl ) ; // this is just the license key to get the image forwarded through the YaCy thumbnail viewer, not an actual lawful license
/* Image format ouput for ViewImage servlet : default is png, except with gif and svg images */
2016-02-09 20:46:44 +01:00
final String viewImageExt = ! imageUrlExt . isEmpty ( ) & & ImageViewer . isBrowserRendered ( imageUrlExt ) ? imageUrlExt : " png " ;
2015-10-21 02:49:51 +02:00
/* Thumb URL */
2016-01-08 20:42:57 +01:00
StringBuilder thumbURLBuilder = new StringBuilder ( " ViewImage. " ) . append ( viewImageExt ) . append ( " ?maxwidth= " )
. append ( DEFAULT_IMG_WIDTH ) . append ( " &maxheight= " ) . append ( DEFAULT_IMG_HEIGHT )
. append ( " &isStatic=true&quadratic " ) ;
/* Only use licence code for non authentified users. For authenticated users licence would never be released and would unnecessarily fill URLLicense.permissions. */
2016-11-28 22:10:05 +01:00
if ( fullViewingRights ) {
2016-01-08 20:42:57 +01:00
thumbURLBuilder . append ( " &url= " ) . append ( imageUrlstring ) ;
} else {
thumbURLBuilder . append ( " &code= " ) . append ( URLLicense . aquireLicense ( image . imageUrl ) ) ;
}
String thumbURL = thumbURLBuilder . toString ( ) ;
prop . putHTML ( " content_item_hrefCache " , thumbURL ) ;
2015-10-21 02:49:51 +02:00
/* Full size preview URL */
2016-11-28 22:10:05 +01:00
if ( fullViewingRights ) {
2016-01-08 20:42:57 +01:00
prop . putHTML ( " content_item_hrefFullPreview " , " ViewImage. " + viewImageExt + " ?isStatic=true&url= " + imageUrlstring ) ;
} else {
/* Not authenticated : full preview URL must be the same as thumb URL */
prop . putHTML ( " content_item_hrefFullPreview " , thumbURL ) ;
}
2015-10-21 02:49:51 +02:00
prop . putHTML ( " content_item_href " , imageUrlstring ) ;
prop . putHTML ( " content_item_target " , target ) ;
prop . put ( " content_item_code " , license ) ;
prop . putHTML ( " content_item_name " , shorten ( image . imagetext , MAX_NAME_LENGTH ) ) ;
prop . put ( " content_item_mimetype " , image . mimetype ) ;
prop . put ( " content_item_fileSize " , 0 ) ;
String itemWidth = DEFAULT_IMG_WIDTH + " px " , itemHeight = DEFAULT_IMG_HEIGHT + " px " , itemStyle = " " ;
/ * When image content is rendered by browser :
* - set smaller dimension to 100 % in order to crop image on other dimension with CSS style ' overflow : hidden ' on image container
* - set negative margin top behave like ViewImage which sets an offset when cutting to square * /
2016-02-09 20:46:44 +01:00
if ( ImageViewer . isBrowserRendered ( imageUrlExt ) ) {
2015-10-21 02:49:51 +02:00
if ( image . width > image . height ) {
/* Landscape orientation */
itemWidth = " " ;
itemHeight = " 100% " ;
if ( image . height > 0 ) {
double scale = ( ( double ) DEFAULT_IMG_HEIGHT ) / ( ( double ) image . height ) ;
int margin = ( int ) ( ( image . height - image . width ) * ( scale / 2 . 0 ) ) ;
itemStyle = " margin-left: " + margin + " px; " ;
}
} else {
/* Portrait orientation, or square or unknown dimensions (both equals zero) */
itemWidth = " 100% " ;
itemHeight = " " ;
if ( image . height > image . width & & image . width > 0 ) {
double scale = ( ( double ) DEFAULT_IMG_WIDTH ) / ( ( double ) image . width ) ;
int margin = ( int ) ( ( image . width - image . height ) * ( scale / 2 . 0 ) ) ;
itemStyle = " margin-top: " + margin + " px; " ;
}
}
}
prop . put ( " content_item_width " , itemWidth ) ;
prop . put ( " content_item_height " , itemHeight ) ;
prop . put ( " content_item_style " , itemStyle ) ;
prop . put ( " content_item_attr " , " " /*(ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"*/ ) ; // attributes, here: original size of image
prop . put ( " content_item_urlhash " , ASCII . String ( image . imageUrl . hash ( ) ) ) ;
prop . put ( " content_item_source " , image . sourceUrl . toNormalform ( true ) ) ;
2017-02-28 18:11:54 +01:00
prop . put ( " content_item_noreferrer " , noreferrer ? 1 : 0 ) ;
2015-10-21 02:49:51 +02:00
prop . putXML ( " content_item_source-xml " , image . sourceUrl . toNormalform ( true ) ) ;
prop . put ( " content_item_sourcedom " , image . sourceUrl . getHost ( ) ) ;
prop . put ( " content_item_nl " , ( item = = theSearch . query . offset ) ? 0 : 1 ) ;
prop . put ( " content_item " , 1 ) ;
} catch ( MalformedURLException e ) {
prop . put ( " content_item " , " 0 " ) ;
}
}
2011-06-13 23:44:03 +02:00
2008-08-02 14:12:04 +02:00
private static String shorten ( final String s , final int length ) {
2011-03-08 23:37:17 +01:00
final String ret ;
if ( s . length ( ) < = length ) {
ret = s ;
} else {
final int p = s . lastIndexOf ( '.' ) ;
if ( p < 0 ) {
ret = s . substring ( 0 , length - SHORTEN_SUFFIX_LENGTH ) + SHORTEN_SUFFIX ;
} else {
assert p > = 0 ;
final String ext = s . substring ( p + 1 ) ;
if ( ext . length ( ) > 4 ) {
ret = s . substring ( 0 , length / 2 - 2 ) + SHORTEN_SUFFIX + s . substring ( s . length ( ) - ( length / 2 - 2 ) ) ;
} else {
ret = s . substring ( 0 , length - ext . length ( ) - SHORTEN_SUFFIX_LENGTH ) + SHORTEN_SUFFIX + ext ;
}
}
}
return ret ;
2007-09-07 13:45:38 +02:00
}
2007-08-28 14:15:46 +02:00
}