2006-10-14 16:13:12 +02:00
// Surftips.java
2006-09-30 00:27:20 +02:00
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.IOException ;
import java.util.Date ;
import java.util.HashMap ;
import java.util.Iterator ;
import de.anomic.http.httpHeader ;
import de.anomic.index.indexURL ;
import de.anomic.kelondro.kelondroMScoreCluster ;
import de.anomic.kelondro.kelondroRow ;
import de.anomic.plasma.plasmaSwitchboard ;
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
import de.anomic.tools.crypt ;
import de.anomic.tools.nxTools ;
import de.anomic.yacy.yacyCore ;
import de.anomic.yacy.yacyNewsPool ;
import de.anomic.yacy.yacyNewsRecord ;
import de.anomic.yacy.yacySeed ;
2006-10-14 16:13:12 +02:00
public class Surftips {
2006-09-30 00:27:20 +02:00
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
final plasmaSwitchboard sb = ( plasmaSwitchboard ) env ;
final serverObjects prop = new serverObjects ( ) ;
boolean authenticated = sb . adminAuthenticated ( header ) > = 2 ;
int display = ( ( post = = null ) | | ( ! authenticated ) ) ? 0 : post . getInt ( " display " , 0 ) ;
prop . put ( " display " , display ) ;
2006-10-01 01:28:03 +02:00
boolean showScore = ( ( post ! = null ) & & ( post . containsKey ( " score " ) ) ) ;
2006-10-14 16:13:12 +02:00
boolean surftipsOn = sb . getConfigBool ( " showSurftips " , true ) ;
if ( ( post ! = null ) & & ( post . containsKey ( " surftips " ) ) ) {
2006-09-30 00:27:20 +02:00
if ( ! sb . verifyAuthentication ( header , false ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
2006-10-14 16:13:12 +02:00
surftipsOn = post . get ( " surftips " , " 0 " ) . equals ( " 1 " ) ;
sb . setConfig ( " showSurftips " , surftipsOn ) ;
2006-09-30 00:27:20 +02:00
}
2006-10-14 16:13:12 +02:00
if ( surftipsOn ) {
2006-09-30 00:27:20 +02:00
// read voting
String hash ;
if ( ( post ! = null ) & & ( ( hash = post . get ( " voteNegative " , null ) ) ! = null ) ) {
if ( ! sb . verifyAuthentication ( header , false ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
// make new news message with voting
HashMap map = new HashMap ( ) ;
map . put ( " urlhash " , hash ) ;
map . put ( " vote " , " negative " ) ;
map . put ( " refid " , post . get ( " refid " , " " ) ) ;
yacyCore . newsPool . publishMyNews ( new yacyNewsRecord ( " stippavt " , map ) ) ;
}
if ( ( post ! = null ) & & ( ( hash = post . get ( " votePositive " , null ) ) ! = null ) ) {
if ( ! sb . verifyAuthentication ( header , false ) ) {
prop . put ( " AUTHENTICATE " , " admin log-in " ) ; // force log-in
return prop ;
}
// make new news message with voting
HashMap map = new HashMap ( ) ;
map . put ( " urlhash " , hash ) ;
map . put ( " url " , crypt . simpleDecode ( post . get ( " url " , " " ) , null ) ) ;
map . put ( " title " , crypt . simpleDecode ( post . get ( " title " , " " ) , null ) ) ;
map . put ( " description " , crypt . simpleDecode ( post . get ( " description " , " " ) , null ) ) ;
map . put ( " vote " , " positive " ) ;
map . put ( " refid " , post . get ( " refid " , " " ) ) ;
yacyCore . newsPool . publishMyNews ( new yacyNewsRecord ( " stippavt " , map ) ) ;
}
2006-10-14 16:13:12 +02:00
// create surftips
2006-09-30 00:27:20 +02:00
HashMap negativeHashes = new HashMap ( ) ; // a mapping from an url hash to Integer (count of votes)
HashMap positiveHashes = new HashMap ( ) ; // a mapping from an url hash to Integer (count of votes)
accumulateVotes ( negativeHashes , positiveHashes , yacyNewsPool . INCOMING_DB ) ;
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
kelondroMScoreCluster ranking = new kelondroMScoreCluster ( ) ; // score cluster for url hashes
kelondroRow rowdef = new kelondroRow ( " String url-255, String title-120, String description-120, String refid- " + ( yacyCore . universalDateShortPattern . length ( ) + 12 ) ) ;
2006-10-14 16:13:12 +02:00
HashMap surftips = new HashMap ( ) ; // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateSurftips ( surftips , ranking , rowdef , negativeHashes , positiveHashes , yacyNewsPool . INCOMING_DB ) ;
//accumulateSurftips(surftips, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateSurftips(surftips, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
2006-09-30 00:27:20 +02:00
// read out surftipp array and create property entries
Iterator k = ranking . scores ( false ) ;
int i = 0 ;
kelondroRow . Entry row ;
String url , urlhash , refid , title , description ;
boolean voted ;
while ( k . hasNext ( ) ) {
urlhash = ( String ) k . next ( ) ;
if ( urlhash = = null ) continue ;
2006-10-14 16:13:12 +02:00
row = ( kelondroRow . Entry ) surftips . get ( urlhash ) ;
2006-09-30 00:27:20 +02:00
if ( row = = null ) continue ;
url = row . getColString ( 0 , null ) ;
title = row . getColString ( 1 , " UTF-8 " ) ;
description = row . getColString ( 2 , " UTF-8 " ) ;
if ( ( url = = null ) | | ( title = = null ) | | ( description = = null ) ) continue ;
refid = row . getColString ( 3 , null ) ;
voted = false ;
try {
voted = ( yacyCore . newsPool . getSpecific ( yacyNewsPool . OUTGOING_DB , " stippavt " , " refid " , refid ) ! = null ) | | ( yacyCore . newsPool . getSpecific ( yacyNewsPool . PUBLISHED_DB , " stippavt " , " refid " , refid ) ! = null ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
}
2006-10-14 16:13:12 +02:00
prop . put ( " surftips_results_ " + i + " _recommend " , ( voted ) ? 0 : 1 ) ;
prop . put ( " surftips_results_ " + i + " _recommend_negativeVoteLink " , " /Surftips.html?voteNegative= " + urlhash + " &refid= " + refid + " &display= " + display + ( ( showScore ) ? " &score= " : " " ) ) ; // for negaive votes, we don't send around the bad url again, the hash is enough
prop . put ( " surftips_results_ " + i + " _recommend_positiveVoteLink " , " /Surftips.html?votePositive= " + urlhash + " &refid= " + refid + " &url= " + crypt . simpleEncode ( url , null , 'b' ) + " &title= " + crypt . simpleEncode ( title , null , 'b' ) + " &description= " + crypt . simpleEncode ( description , null , 'b' ) + " &display= " + display + ( ( showScore ) ? " &score= " : " " ) ) ;
prop . put ( " surftips_results_ " + i + " _url " , url ) ;
prop . put ( " surftips_results_ " + i + " _urlname " , nxTools . shortenURLString ( url , 60 ) ) ;
prop . put ( " surftips_results_ " + i + " _urlhash " , urlhash ) ;
prop . put ( " surftips_results_ " + i + " _title " , ( showScore ) ? ( " ( " + ranking . getScore ( urlhash ) + " ) " + title ) : title ) ;
prop . put ( " surftips_results_ " + i + " _description " , description ) ;
2006-09-30 00:27:20 +02:00
i + + ;
if ( i > = 50 ) break ;
}
2006-10-14 16:13:12 +02:00
prop . put ( " surftips_results " , i ) ;
prop . put ( " surftips " , 1 ) ;
2006-09-30 00:27:20 +02:00
} else {
2006-10-14 16:13:12 +02:00
prop . put ( " surftips " , 0 ) ;
2006-09-30 00:27:20 +02:00
}
return prop ;
}
private static int timeFactor ( Date created ) {
return ( int ) Math . max ( 0 , 10 - ( ( System . currentTimeMillis ( ) - created . getTime ( ) ) / 24 / 60 / 60 / 1000 ) ) ;
}
private static void accumulateVotes ( HashMap negativeHashes , HashMap positiveHashes , int dbtype ) {
int maxCount = Math . min ( 1000 , yacyCore . newsPool . size ( dbtype ) ) ;
yacyNewsRecord record ;
for ( int j = 0 ; j < maxCount ; j + + ) try {
record = yacyCore . newsPool . get ( dbtype , j ) ;
if ( record = = null ) continue ;
if ( record . category ( ) . equals ( " stippavt " ) ) {
String urlhash = record . attribute ( " urlhash " , " " ) ;
String vote = record . attribute ( " vote " , " " ) ;
int factor = ( ( dbtype = = yacyNewsPool . OUTGOING_DB ) | | ( dbtype = = yacyNewsPool . PUBLISHED_DB ) ) ? 2 : 1 ;
if ( vote . equals ( " negative " ) ) {
Integer i = ( Integer ) negativeHashes . get ( urlhash ) ;
if ( i = = null ) negativeHashes . put ( urlhash , new Integer ( factor ) ) ;
else negativeHashes . put ( urlhash , new Integer ( i . intValue ( ) + factor ) ) ;
}
if ( vote . equals ( " positive " ) ) {
Integer i = ( Integer ) positiveHashes . get ( urlhash ) ;
if ( i = = null ) positiveHashes . put ( urlhash , new Integer ( factor ) ) ;
else positiveHashes . put ( urlhash , new Integer ( i . intValue ( ) + factor ) ) ;
}
}
} catch ( IOException e ) { e . printStackTrace ( ) ; }
}
2006-10-14 16:13:12 +02:00
private static void accumulateSurftips (
HashMap surftips , kelondroMScoreCluster ranking , kelondroRow rowdef ,
2006-09-30 00:27:20 +02:00
HashMap negativeHashes , HashMap positiveHashes , int dbtype ) {
int maxCount = Math . min ( 1000 , yacyCore . newsPool . size ( dbtype ) ) ;
yacyNewsRecord record ;
String url = " " , urlhash ;
kelondroRow . Entry entry ;
int score = 0 ;
Integer vote ;
for ( int j = 0 ; j < maxCount ; j + + ) try {
record = yacyCore . newsPool . get ( dbtype , j ) ;
if ( record = = null ) continue ;
entry = null ;
if ( record . category ( ) . equals ( " crwlstrt " ) ) {
String intention = record . attribute ( " intention " , " " ) ;
url = record . attribute ( " startURL " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( ( intention . length ( ) = = 0 ) ? record . attribute ( " startURL " , " " ) : intention ) . getBytes ( ) ,
( " Crawl Start Point " ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 2 + Math . min ( 10 , intention . length ( ) / 4 ) + timeFactor ( record . created ( ) ) ;
}
if ( record . category ( ) . equals ( " prfleupd " ) ) {
url = record . attribute ( " homepage " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( " Home Page of " + record . attribute ( " nickname " , " " ) ) . getBytes ( " UTF-8 " ) ,
( " Profile Update " ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 1 + timeFactor ( record . created ( ) ) ;
}
if ( record . category ( ) . equals ( " bkmrkadd " ) ) {
url = record . attribute ( " url " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( record . attribute ( " title " , " " ) ) . getBytes ( " UTF-8 " ) ,
( " Bookmark: " + record . attribute ( " description " , " " ) ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 8 + timeFactor ( record . created ( ) ) ;
}
if ( record . category ( ) . equals ( " stippadd " ) ) {
url = record . attribute ( " url " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( record . attribute ( " title " , " " ) ) . getBytes ( " UTF-8 " ) ,
( " Surf Tipp: " + record . attribute ( " description " , " " ) ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 5 + timeFactor ( record . created ( ) ) ;
}
if ( record . category ( ) . equals ( " stippavt " ) ) {
if ( ! ( record . attribute ( " vote " , " negative " ) . equals ( " positive " ) ) ) continue ;
url = record . attribute ( " url " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
record . attribute ( " title " , " " ) . getBytes ( " UTF-8 " ) ,
record . attribute ( " description " , " " ) . getBytes ( " UTF-8 " ) ,
record . attribute ( " refid " , " " ) . getBytes ( )
} ) ;
score = 5 + timeFactor ( record . created ( ) ) ;
}
if ( record . category ( ) . equals ( " wiki_upd " ) ) {
yacySeed seed = yacyCore . seedDB . getConnected ( record . originator ( ) ) ;
if ( seed = = null ) seed = yacyCore . seedDB . getDisconnected ( record . originator ( ) ) ;
if ( seed ! = null ) {
url = " http:// " + seed . getAddress ( ) + " /Wiki.html?page= " + record . attribute ( " page " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( record . attribute ( " author " , " Anonymous " ) + " : " + record . attribute ( " page " , " " ) ) . getBytes ( " UTF-8 " ) ,
( " Wiki Update: " + record . attribute ( " description " , " " ) ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 4 + timeFactor ( record . created ( ) ) ;
}
}
if ( record . category ( ) . equals ( " blog_add " ) ) {
yacySeed seed = yacyCore . seedDB . getConnected ( record . originator ( ) ) ;
if ( seed = = null ) seed = yacyCore . seedDB . getDisconnected ( record . originator ( ) ) ;
if ( seed ! = null ) {
url = " http:// " + seed . getAddress ( ) + " /Blog.html?page= " + record . attribute ( " page " , " " ) ;
entry = rowdef . newEntry ( new byte [ ] [ ] {
url . getBytes ( ) ,
( record . attribute ( " author " , " Anonymous " ) + " : " + record . attribute ( " page " , " " ) ) . getBytes ( " UTF-8 " ) ,
( " Blog Entry: " + record . attribute ( " subject " , " " ) ) . getBytes ( " UTF-8 " ) ,
record . id ( ) . getBytes ( )
} ) ;
score = 4 + timeFactor ( record . created ( ) ) ;
}
}
// add/subtract votes and write record
if ( entry ! = null ) {
urlhash = indexURL . urlHash ( url ) ;
if ( urlhash = = null ) {
2006-10-14 16:13:12 +02:00
System . out . println ( " Surftips: bad url ' " + url + " ' from news record " + record . toString ( ) ) ;
2006-09-30 00:27:20 +02:00
continue ;
}
if ( ( vote = ( Integer ) negativeHashes . get ( urlhash ) ) ! = null ) {
score = Math . max ( 0 , score - vote . intValue ( ) ) ; // do not go below zero
}
if ( ( vote = ( Integer ) positiveHashes . get ( urlhash ) ) ! = null ) {
score + = 2 * vote . intValue ( ) ;
}
// consider double-entries
2006-10-14 16:13:12 +02:00
if ( surftips . containsKey ( urlhash ) ) {
2006-09-30 00:27:20 +02:00
ranking . addScore ( urlhash , score ) ;
} else {
ranking . setScore ( urlhash , score ) ;
2006-10-14 16:13:12 +02:00
surftips . put ( urlhash , entry ) ;
2006-09-30 00:27:20 +02:00
}
}
} catch ( IOException e ) { e . printStackTrace ( ) ; }
}
}