2005-10-05 16:11:50 +02:00
// IndexControl_p.java
2005-04-07 21:19:42 +02:00
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
2005-10-05 16:11:50 +02:00
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2005-04-07 21:19:42 +02:00
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
2005-05-12 19:50:45 +02:00
// You must compile this file with
2005-10-05 16:11:50 +02:00
// javac -classpath .:../classes IndexControl_p.java
2005-04-07 21:19:42 +02:00
// if the shell's current path is HTROOT
2005-05-05 07:36:42 +02:00
import java.io.IOException ;
2005-12-15 11:31:00 +01:00
import java.net.MalformedURLException ;
2005-05-05 07:36:42 +02:00
import java.util.Enumeration ;
2005-08-13 00:14:24 +02:00
import java.util.HashMap ;
2006-09-30 00:27:20 +02:00
import java.util.HashSet ;
2005-05-05 07:36:42 +02:00
import java.util.Iterator ;
2006-08-01 12:30:55 +02:00
import java.util.Set ;
2005-10-05 16:11:50 +02:00
import java.util.TreeMap ;
2005-10-12 14:28:49 +02:00
2005-05-05 07:36:42 +02:00
import de.anomic.http.httpHeader ;
2006-05-28 03:09:31 +02:00
import de.anomic.index.indexContainer ;
2006-11-08 17:17:47 +01:00
import de.anomic.index.indexRWIEntry ;
2006-11-10 02:13:33 +01:00
import de.anomic.plasma.plasmaURL ;
2006-11-08 17:17:47 +01:00
import de.anomic.index.indexURLEntry ;
2006-09-30 00:27:20 +02:00
import de.anomic.net.URL ;
2006-11-23 03:16:30 +01:00
import de.anomic.plasma.plasmaCondenser ;
2005-05-05 07:36:42 +02:00
import de.anomic.plasma.plasmaSwitchboard ;
2006-08-12 16:28:14 +02:00
import de.anomic.plasma.urlPattern.plasmaURLPattern ;
2006-10-10 22:09:26 +02:00
import de.anomic.server.serverCodings ;
2005-05-05 07:36:42 +02:00
import de.anomic.server.serverObjects ;
import de.anomic.server.serverSwitch ;
import de.anomic.yacy.yacyClient ;
import de.anomic.yacy.yacyCore ;
import de.anomic.yacy.yacySeed ;
2005-04-07 21:19:42 +02:00
public class IndexControl_p {
2006-12-20 00:55:52 +01:00
public static serverObjects respond ( httpHeader header , serverObjects post , serverSwitch env ) {
2005-10-05 16:11:50 +02:00
// return variable that accumulates replacements
2005-04-07 21:19:42 +02:00
plasmaSwitchboard switchboard = ( plasmaSwitchboard ) env ;
2006-09-21 22:36:46 +02:00
2005-10-05 16:11:50 +02:00
serverObjects prop = new serverObjects ( ) ;
2005-04-07 21:19:42 +02:00
2005-10-05 16:11:50 +02:00
if ( post = = null | | env = = null ) {
2005-04-07 21:19:42 +02:00
prop . put ( " keystring " , " " ) ;
prop . put ( " keyhash " , " " ) ;
prop . put ( " urlstring " , " " ) ;
prop . put ( " urlhash " , " " ) ;
prop . put ( " result " , " " ) ;
2005-08-03 15:43:55 +02:00
prop . put ( " wcount " , Integer . toString ( switchboard . wordIndex . size ( ) ) ) ;
2006-12-05 03:47:51 +01:00
prop . put ( " ucount " , Integer . toString ( switchboard . wordIndex . loadedURL . size ( ) ) ) ;
2005-04-07 21:19:42 +02:00
prop . put ( " otherHosts " , " " ) ;
2006-12-20 00:55:52 +01:00
prop . put ( " indexDistributeChecked " , ( switchboard . getConfig ( " allowDistributeIndex " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexDistributeWhileCrawling " , ( switchboard . getConfig ( " allowDistributeIndexWhileCrawling " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexReceiveChecked " , ( switchboard . getConfig ( " allowReceiveIndex " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexReceiveBlockBlacklistChecked " , ( switchboard . getConfig ( " indexReceiveBlockBlacklist " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
2006-10-10 22:09:26 +02:00
prop . put ( " peertags " , serverCodings . set2string ( yacyCore . seedDB . mySeed . getPeerTags ( ) , " , " , false ) ) ;
2005-04-07 21:19:42 +02:00
return prop ; // be save
}
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
// default values
2005-12-15 11:31:00 +01:00
String keystring = ( ( String ) post . get ( " keystring " , " " ) ) . trim ( ) ;
String keyhash = ( ( String ) post . get ( " keyhash " , " " ) ) . trim ( ) ;
String urlstring = ( ( String ) post . get ( " urlstring " , " " ) ) . trim ( ) ;
String urlhash = ( ( String ) post . get ( " urlhash " , " " ) ) . trim ( ) ;
2005-10-05 16:11:50 +02:00
if ( ! urlstring . startsWith ( " http:// " ) & &
! urlstring . startsWith ( " https:// " ) ) { urlstring = " http:// " + urlstring ; }
2005-04-07 21:19:42 +02:00
prop . put ( " keystring " , keystring ) ;
prop . put ( " keyhash " , keyhash ) ;
prop . put ( " urlstring " , urlstring ) ;
prop . put ( " urlhash " , urlhash ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " result " , " " ) ;
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
// read values from checkboxes
String [ ] urlx = post . getAll ( " urlhx.* " ) ;
boolean delurl = post . containsKey ( " delurl " ) ;
boolean delurlref = post . containsKey ( " delurlref " ) ;
2005-10-05 16:11:50 +02:00
// System.out.println("DEBUG CHECK: " + ((delurl) ? "delurl" : "") + " " + ((delurlref) ? "delurlref" : ""));
// DHT control
2005-07-28 00:52:29 +02:00
if ( post . containsKey ( " setIndexTransmission " ) ) {
2005-10-05 16:11:50 +02:00
if ( post . get ( " indexDistribute " , " " ) . equals ( " on " ) ) {
switchboard . setConfig ( " allowDistributeIndex " , " true " ) ;
} else {
switchboard . setConfig ( " allowDistributeIndex " , " false " ) ;
}
2006-09-20 06:44:56 +02:00
if ( post . get ( " indexDistributeWhileCrawling " , " " ) . equals ( " on " ) ) {
2005-10-05 16:11:50 +02:00
switchboard . setConfig ( " allowDistributeIndexWhileCrawling " , " true " ) ;
} else {
switchboard . setConfig ( " allowDistributeIndexWhileCrawling " , " false " ) ;
}
if ( post . get ( " indexReceive " , " " ) . equals ( " on " ) ) {
switchboard . setConfig ( " allowReceiveIndex " , " true " ) ;
yacyCore . seedDB . mySeed . setFlagAcceptRemoteIndex ( true ) ;
} else {
switchboard . setConfig ( " allowReceiveIndex " , " false " ) ;
yacyCore . seedDB . mySeed . setFlagAcceptRemoteIndex ( false ) ;
}
if ( post . get ( " indexReceiveBlockBlacklist " , " " ) . equals ( " on " ) ) {
switchboard . setConfig ( " indexReceiveBlockBlacklist " , " true " ) ;
} else {
switchboard . setConfig ( " indexReceiveBlockBlacklist " , " false " ) ;
}
2006-10-10 22:09:26 +02:00
if ( post . containsKey ( " peertags " ) ) {
yacyCore . seedDB . mySeed . setPeerTags ( serverCodings . string2set ( ( String ) post . get ( " peertags " ) , " , " ) ) ;
}
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
// delete word
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " keyhashdeleteall " ) ) {
2005-10-05 16:11:50 +02:00
if ( delurl | | delurlref ) {
2005-05-07 23:11:18 +02:00
// generate an urlx array
2006-05-28 03:09:31 +02:00
indexContainer index = null ;
2006-12-06 13:51:46 +01:00
index = switchboard . wordIndex . getContainer ( keyhash , null , - 1 ) ;
2006-01-30 13:42:06 +01:00
Iterator en = index . entries ( ) ;
int i = 0 ;
urlx = new String [ index . size ( ) ] ;
while ( en . hasNext ( ) ) {
2006-11-08 17:17:47 +01:00
urlx [ i + + ] = ( ( indexRWIEntry ) en . next ( ) ) . urlHash ( ) ;
2005-04-07 21:19:42 +02:00
}
2006-01-30 13:42:06 +01:00
index = null ;
2005-05-07 23:11:18 +02:00
}
2005-10-05 16:11:50 +02:00
if ( delurlref ) {
2005-12-15 11:31:00 +01:00
for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
2005-10-05 16:11:50 +02:00
}
if ( delurl | | delurlref ) {
for ( int i = 0 ; i < urlx . length ; i + + ) {
2006-12-05 03:47:51 +01:00
switchboard . wordIndex . loadedURL . remove ( urlx [ i ] ) ;
2005-10-05 16:11:50 +02:00
}
}
2006-05-26 11:32:50 +02:00
switchboard . wordIndex . deleteContainer ( keyhash ) ;
2005-04-07 21:19:42 +02:00
post . remove ( " keyhashdeleteall " ) ;
2005-10-05 16:11:50 +02:00
if ( keystring . length ( ) > 0 & &
2006-11-23 03:16:30 +01:00
plasmaCondenser . word2hash ( keystring ) . equals ( keyhash ) ) {
2005-04-07 21:19:42 +02:00
post . put ( " keystringsearch " , " generated " ) ;
2005-10-05 16:11:50 +02:00
} else {
2005-04-07 21:19:42 +02:00
post . put ( " keyhashsearch " , " generated " ) ;
2005-10-05 16:11:50 +02:00
}
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
// delete selected URLs
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " keyhashdelete " ) ) {
2005-10-05 16:11:50 +02:00
if ( delurlref ) {
2005-12-15 11:31:00 +01:00
for ( int i = 0 ; i < urlx . length ; i + + ) switchboard . removeAllUrlReferences ( urlx [ i ] , true ) ;
2005-10-05 16:11:50 +02:00
}
if ( delurl | | delurlref ) {
for ( int i = 0 ; i < urlx . length ; i + + ) {
2006-12-05 03:47:51 +01:00
switchboard . wordIndex . loadedURL . remove ( urlx [ i ] ) ;
2005-10-05 16:11:50 +02:00
}
}
2006-08-01 12:30:55 +02:00
Set urlHashes = new HashSet ( ) ;
for ( int i = 0 ; i < urlx . length ; i + + ) urlHashes . add ( urlx [ i ] ) ;
2006-12-06 13:51:46 +01:00
switchboard . wordIndex . removeEntries ( keyhash , urlHashes ) ;
2005-04-07 21:19:42 +02:00
// this shall lead to a presentation of the list; so handle that the remaining program
// thinks that it was called for a list presentation
post . remove ( " keyhashdelete " ) ;
2006-11-23 03:16:30 +01:00
if ( keystring . length ( ) > 0 & & plasmaCondenser . word2hash ( keystring ) . equals ( keyhash ) ) {
2005-04-07 21:19:42 +02:00
post . put ( " keystringsearch " , " generated " ) ;
2005-10-05 16:11:50 +02:00
} else {
2005-04-07 21:19:42 +02:00
post . put ( " keyhashsearch " , " generated " ) ;
2005-10-05 16:11:50 +02:00
// prop.put("result", "Delete of relation of url hashes " + result + " to key hash " + keyhash);
}
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " urlhashdeleteall " ) ) {
2005-12-15 11:31:00 +01:00
//try {
2005-12-11 01:25:02 +01:00
int i = switchboard . removeAllUrlReferences ( urlhash , true ) ;
prop . put ( " result " , " Deleted URL and " + i + " references from " + i + " word indexes. " ) ;
2005-12-15 11:31:00 +01:00
//} catch (IOException e) {
// prop.put("result", "Deleted nothing because the url-hash could not be resolved");
//}
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " urlhashdelete " ) ) {
2006-12-05 03:47:51 +01:00
indexURLEntry entry = switchboard . wordIndex . loadedURL . load ( urlhash , null ) ;
2006-09-07 20:24:39 +02:00
if ( entry = = null ) {
prop . put ( " result " , " No Entry for URL hash " + urlhash + " ; nothing deleted. " ) ;
} else {
2006-10-19 00:25:07 +02:00
urlstring = entry . comp ( ) . url ( ) . toNormalform ( ) ;
2006-09-08 13:54:28 +02:00
prop . put ( " urlstring " , " " ) ;
2006-12-05 03:47:51 +01:00
switchboard . wordIndex . loadedURL . remove ( urlhash ) ;
2006-09-08 13:54:28 +02:00
prop . put ( " result " , " Removed URL " + urlstring ) ;
2005-04-07 21:19:42 +02:00
}
}
if ( post . containsKey ( " keystringsearch " ) ) {
2006-11-23 03:16:30 +01:00
keyhash = plasmaCondenser . word2hash ( keystring ) ;
2005-04-07 21:19:42 +02:00
prop . put ( " keyhash " , keyhash ) ;
prop . put ( " urlstring " , " " ) ;
prop . put ( " urlhash " , " " ) ;
2006-09-21 22:36:46 +02:00
prop . putAll ( genUrlList ( switchboard , keyhash , keystring ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " keyhashsearch " ) ) {
2006-11-23 03:16:30 +01:00
if ( keystring . length ( ) = = 0 | | ! plasmaCondenser . word2hash ( keystring ) . equals ( keyhash ) ) {
2005-04-07 21:19:42 +02:00
prop . put ( " keystring " , " <not possible to compute word from hash> " ) ;
2005-10-05 16:11:50 +02:00
}
2005-04-07 21:19:42 +02:00
prop . put ( " urlstring " , " " ) ;
prop . put ( " urlhash " , " " ) ;
2006-09-21 22:36:46 +02:00
prop . putAll ( genUrlList ( switchboard , keyhash , " " ) ) ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
// transfer to other peer
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " keyhashtransfer " ) ) {
2006-11-23 03:16:30 +01:00
if ( keystring . length ( ) = = 0 | | ! plasmaCondenser . word2hash ( keystring ) . equals ( keyhash ) ) {
2005-04-07 21:19:42 +02:00
prop . put ( " keystring " , " <not possible to compute word from hash> " ) ;
2005-10-05 16:11:50 +02:00
}
2006-11-24 02:12:14 +01:00
// find host & peer
String host = post . get ( " host " , " " ) ; // get host from input field
yacySeed seed = null ;
if ( host . length ( ) ! = 0 ) {
if ( host . length ( ) = = 12 ) {
// the host string is a peer hash
seed = yacyCore . seedDB . getConnected ( host ) ;
} else {
// the host string can be a host name
seed = yacyCore . seedDB . lookupByName ( host ) ;
}
} else {
host = post . get ( " hostHash " , " " ) ; // if input field is empty, get from select box
seed = yacyCore . seedDB . getConnected ( host ) ;
}
// prepare index
2005-04-07 21:19:42 +02:00
prop . put ( " urlstring " , " " ) ;
prop . put ( " urlhash " , " " ) ;
2006-05-28 03:09:31 +02:00
indexContainer index ;
2005-04-07 21:19:42 +02:00
String result ;
long starttime = System . currentTimeMillis ( ) ;
2006-12-06 13:51:46 +01:00
index = switchboard . wordIndex . getContainer ( keyhash , null , - 1 ) ;
2005-08-13 00:14:24 +02:00
// built urlCache
2006-01-30 13:42:06 +01:00
Iterator urlIter = index . entries ( ) ;
2005-08-13 00:14:24 +02:00
HashMap knownURLs = new HashMap ( ) ;
HashSet unknownURLEntries = new HashSet ( ) ;
2006-11-08 17:17:47 +01:00
indexRWIEntry iEntry ;
indexURLEntry lurl ;
2005-10-13 15:57:15 +02:00
while ( urlIter . hasNext ( ) ) {
2006-11-08 17:17:47 +01:00
iEntry = ( indexRWIEntry ) urlIter . next ( ) ;
2006-12-05 03:47:51 +01:00
lurl = switchboard . wordIndex . loadedURL . load ( iEntry . urlHash ( ) , null ) ;
2006-09-08 13:54:28 +02:00
if ( lurl = = null ) {
2006-08-02 21:59:28 +02:00
unknownURLEntries . add ( iEntry . urlHash ( ) ) ;
2006-09-07 20:24:39 +02:00
urlIter . remove ( ) ;
} else {
knownURLs . put ( iEntry . urlHash ( ) , lurl ) ;
2005-08-13 00:14:24 +02:00
}
}
2006-11-24 02:12:14 +01:00
// transport to other peer
2005-10-05 12:45:33 +02:00
String gzipBody = switchboard . getConfig ( " indexControl.gzipBody " , " false " ) ;
int timeout = ( int ) switchboard . getConfigLong ( " indexControl.timeout " , 60000 ) ;
2006-06-14 11:40:42 +02:00
HashMap resultObj = yacyClient . transferIndex (
2006-11-24 02:12:14 +01:00
seed ,
2006-05-28 03:09:31 +02:00
new indexContainer [ ] { index } ,
2005-10-05 16:11:50 +02:00
knownURLs ,
" true " . equalsIgnoreCase ( gzipBody ) ,
timeout ) ;
2006-06-14 11:40:42 +02:00
result = ( String ) resultObj . get ( " result " ) ;
2006-12-08 03:14:56 +01:00
prop . put ( " result " , ( result = = null ) ? ( " Successfully transferred " + knownURLs . size ( ) + " words in " + ( ( System . currentTimeMillis ( ) - starttime ) / 1000 ) + " seconds, " + unknownURLEntries + " URL not found " ) : result ) ;
2006-01-30 13:42:06 +01:00
index = null ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
// generate list
if ( post . containsKey ( " keyhashsimilar " ) ) {
2006-12-05 03:47:51 +01:00
final Iterator containerIt = switchboard . wordIndex . indexContainerSet ( keyhash , false , true , 256 ) . iterator ( ) ;
2006-07-26 13:21:51 +02:00
indexContainer container ;
2006-03-17 21:52:43 +01:00
int i = 0 ;
2006-09-21 22:36:46 +02:00
int rows = 0 , cols = 0 ;
prop . put ( " keyhashsimilar " , 1 ) ;
2006-07-26 13:21:51 +02:00
while ( containerIt . hasNext ( ) & & i < 256 ) {
container = ( indexContainer ) containerIt . next ( ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " keyhashsimilar_rows_ " + rows + " _cols_ " + cols + " _wordHash " , container . getWordHash ( ) ) ;
cols + + ;
if ( cols = = 8 ) {
prop . put ( " keyhashsimilar_rows_ " + rows + " _cols " , cols ) ;
cols = 0 ;
rows + + ;
}
2006-03-17 21:52:43 +01:00
i + + ;
}
2006-11-19 21:05:25 +01:00
prop . put ( " keyhashsimilar_rows_ " + rows + " _cols " , cols ) ;
prop . put ( " keyhashsimilar_rows " , rows + 1 ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " result " , " " ) ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
if ( post . containsKey ( " urlstringsearch " ) ) {
2005-04-07 21:19:42 +02:00
try {
URL url = new URL ( urlstring ) ;
2006-11-10 02:13:33 +01:00
urlhash = plasmaURL . urlHash ( url ) ;
2006-09-07 20:24:39 +02:00
prop . put ( " urlhash " , urlhash ) ;
2006-12-05 03:47:51 +01:00
indexURLEntry entry = switchboard . wordIndex . loadedURL . load ( urlhash , null ) ;
2006-09-07 20:24:39 +02:00
if ( entry = = null ) {
prop . put ( " urlstring " , " unknown url: " + urlstring ) ;
prop . put ( " urlhash " , " " ) ;
} else {
2006-09-21 22:36:46 +02:00
prop . putAll ( genUrlProfile ( switchboard , entry , urlhash ) ) ;
2006-09-07 20:24:39 +02:00
}
2005-12-15 11:31:00 +01:00
} catch ( MalformedURLException e ) {
prop . put ( " urlstring " , " bad url: " + urlstring ) ;
prop . put ( " urlhash " , " " ) ;
2005-04-07 21:19:42 +02:00
}
}
2005-10-05 16:11:50 +02:00
2005-04-07 21:19:42 +02:00
if ( post . containsKey ( " urlhashsearch " ) ) {
2006-12-05 03:47:51 +01:00
indexURLEntry entry = switchboard . wordIndex . loadedURL . load ( urlhash , null ) ;
2006-09-07 20:24:39 +02:00
if ( entry = = null ) {
prop . put ( " result " , " No Entry for URL hash " + urlhash ) ;
} else {
2006-10-19 00:25:07 +02:00
prop . put ( " urlstring " , entry . comp ( ) . url ( ) . toNormalform ( ) ) ;
2006-09-21 22:36:46 +02:00
prop . putAll ( genUrlProfile ( switchboard , entry , urlhash ) ) ;
2005-04-07 21:19:42 +02:00
}
}
2005-10-05 16:11:50 +02:00
// generate list
if ( post . containsKey ( " urlhashsimilar " ) ) {
2006-03-17 21:52:43 +01:00
try {
2006-12-05 03:47:51 +01:00
final Iterator entryIt = switchboard . wordIndex . loadedURL . entries ( true , true , urlhash ) ;
2006-07-24 18:40:59 +02:00
StringBuffer result = new StringBuffer ( " Sequential List of URL-Hashes:<br> " ) ;
2006-11-08 17:17:47 +01:00
indexURLEntry entry ;
2006-03-17 21:52:43 +01:00
int i = 0 ;
2006-09-21 22:36:46 +02:00
int rows = 0 , cols = 0 ;
prop . put ( " urlhashsimilar " , 1 ) ;
2006-07-24 18:40:59 +02:00
while ( entryIt . hasNext ( ) & & i < 256 ) {
2006-11-08 17:17:47 +01:00
entry = ( indexURLEntry ) entryIt . next ( ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " urlhashsimilar_rows_ " + rows + " _cols_ " + cols + " _urlHash " , entry . hash ( ) ) ;
cols + + ;
if ( cols = = 8 ) {
prop . put ( " urlhashsimilar_rows_ " + rows + " _cols " , cols ) ;
cols = 0 ;
rows + + ;
}
2006-03-17 21:52:43 +01:00
i + + ;
}
2006-09-21 22:36:46 +02:00
prop . put ( " urlhashsimilar_rows " , rows ) ;
2006-03-17 21:52:43 +01:00
prop . put ( " result " , result . toString ( ) ) ;
} catch ( IOException e ) {
prop . put ( " result " , " No Entries for URL hash " + urlhash ) ;
2005-04-07 21:19:42 +02:00
}
}
2005-10-05 16:11:50 +02:00
// list known hosts
yacySeed seed ;
2005-04-07 21:19:42 +02:00
int hc = 0 ;
2005-10-05 16:11:50 +02:00
if ( yacyCore . seedDB ! = null & & yacyCore . seedDB . sizeConnected ( ) > 0 ) {
Enumeration e = yacyCore . dhtAgent . getAcceptRemoteIndexSeeds ( keyhash ) ;
while ( e . hasMoreElements ( ) ) {
seed = ( yacySeed ) e . nextElement ( ) ;
2005-09-13 07:46:55 +02:00
if ( seed ! = null ) {
prop . put ( " hosts_ " + hc + " _hosthash " , seed . hash ) ;
2006-11-30 01:23:07 +01:00
prop . put ( " hosts_ " + hc + " _hostname " , seed . hash + " " + seed . get ( yacySeed . NAME , " nameless " ) ) ;
2005-09-13 07:46:55 +02:00
hc + + ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
}
2005-08-03 15:43:55 +02:00
prop . put ( " hosts " , Integer . toString ( hc ) ) ;
2005-10-05 16:11:50 +02:00
} else {
2005-04-07 21:19:42 +02:00
prop . put ( " hosts " , " 0 " ) ;
2005-10-05 16:11:50 +02:00
}
2005-04-07 21:19:42 +02:00
// insert constants
2005-08-03 15:43:55 +02:00
prop . put ( " wcount " , Integer . toString ( switchboard . wordIndex . size ( ) ) ) ;
2006-12-05 03:47:51 +01:00
prop . put ( " ucount " , Integer . toString ( switchboard . wordIndex . loadedURL . size ( ) ) ) ;
2006-12-22 01:53:40 +01:00
prop . put ( " indexDistributeChecked " , ( switchboard . getConfig ( " allowDistributeIndex " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexDistributeWhileCrawling " , ( switchboard . getConfig ( " allowDistributeIndexWhileCrawling " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexReceiveChecked " , ( switchboard . getConfig ( " allowReceiveIndex " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
prop . put ( " indexReceiveBlockBlacklistChecked " , ( switchboard . getConfig ( " indexReceiveBlockBlacklist " , " true " ) . equals ( " true " ) ) ? 1 : 0 ) ;
2006-10-10 22:09:26 +02:00
prop . put ( " peertags " , serverCodings . set2string ( yacyCore . seedDB . mySeed . getPeerTags ( ) , " , " , false ) ) ;
2005-04-07 21:19:42 +02:00
// return rewrite properties
2005-10-05 16:11:50 +02:00
return prop ;
2005-04-07 21:19:42 +02:00
}
2006-11-08 17:17:47 +01:00
public static serverObjects genUrlProfile ( plasmaSwitchboard switchboard , indexURLEntry entry , String urlhash ) {
2006-09-21 22:36:46 +02:00
serverObjects prop = new serverObjects ( ) ;
if ( entry = = null ) {
prop . put ( " genUrlProfile " , 1 ) ;
prop . put ( " genUrlProfile_urlhash " , urlhash ) ;
return prop ;
}
2006-11-08 17:17:47 +01:00
indexURLEntry . Components comp = entry . comp ( ) ;
2005-12-11 01:25:02 +01:00
String referrer = null ;
2006-12-05 03:47:51 +01:00
indexURLEntry le = switchboard . wordIndex . loadedURL . load ( entry . referrerHash ( ) , null ) ;
2006-09-07 20:24:39 +02:00
if ( le = = null ) {
2005-12-11 01:25:02 +01:00
referrer = " <unknown> " ;
2006-09-07 20:24:39 +02:00
} else {
2006-10-19 00:25:07 +02:00
referrer = le . comp ( ) . url ( ) . toNormalform ( ) ;
2005-12-11 01:25:02 +01:00
}
2006-10-19 00:25:07 +02:00
if ( comp . url ( ) = = null ) {
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlProfile " , 1 ) ;
prop . put ( " genUrlProfile_urlhash " , urlhash ) ;
return prop ;
}
prop . put ( " genUrlProfile " , 2 ) ;
2006-10-19 00:25:07 +02:00
prop . put ( " genUrlProfile_urlNormalform " , comp . url ( ) . toNormalform ( ) ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlProfile_urlhash " , urlhash ) ;
2006-10-19 00:25:07 +02:00
prop . put ( " genUrlProfile_urlDescr " , comp . descr ( ) ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlProfile_moddate " , entry . moddate ( ) ) ;
prop . put ( " genUrlProfile_loaddate " , entry . loaddate ( ) ) ;
prop . put ( " genUrlProfile_referrer " , referrer ) ;
prop . put ( " genUrlProfile_doctype " , " " + entry . doctype ( ) ) ;
prop . put ( " genUrlProfile_language " , entry . language ( ) ) ;
prop . put ( " genUrlProfile_size " , entry . size ( ) ) ;
prop . put ( " genUrlProfile_wordCount " , entry . wordCount ( ) ) ;
return prop ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
2006-09-21 22:36:46 +02:00
public static serverObjects genUrlList ( plasmaSwitchboard switchboard , String keyhash , String keystring ) {
2005-04-07 21:19:42 +02:00
// search for a word hash and generate a list of url links
2006-09-21 22:36:46 +02:00
serverObjects prop = new serverObjects ( ) ;
2006-05-28 03:09:31 +02:00
indexContainer index = null ;
2005-04-07 21:19:42 +02:00
try {
2006-12-06 13:51:46 +01:00
index = switchboard . wordIndex . getContainer ( keyhash , null , - 1 ) ;
2005-10-05 16:11:50 +02:00
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList_keyHash " , keyhash ) ;
2006-11-19 21:05:25 +01:00
if ( ( index = = null ) | | ( index . size ( ) = = 0 ) ) {
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList " , 1 ) ;
2006-11-24 02:12:14 +01:00
prop . put ( " genUrlList_count " , 0 ) ;
2005-04-07 21:19:42 +02:00
} else {
2006-01-30 01:42:38 +01:00
final Iterator en = index . entries ( ) ;
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList " , 2 ) ;
2006-01-19 15:13:39 +01:00
String us ;
String uh [ ] = new String [ 2 ] ;
2005-04-07 21:19:42 +02:00
int i = 0 ;
2005-10-05 16:11:50 +02:00
final TreeMap tm = new TreeMap ( ) ;
2006-11-08 17:17:47 +01:00
indexRWIEntry xi ;
2005-10-13 15:57:15 +02:00
while ( en . hasNext ( ) ) {
2006-11-08 17:17:47 +01:00
xi = ( indexRWIEntry ) en . next ( ) ;
2006-07-24 00:39:41 +02:00
uh = new String [ ] { xi . urlHash ( ) , Integer . toString ( xi . posintext ( ) ) } ;
2006-12-05 03:47:51 +01:00
indexURLEntry le = switchboard . wordIndex . loadedURL . load ( uh [ 0 ] , null ) ;
2006-09-07 20:24:39 +02:00
if ( le = = null ) {
2006-01-19 15:13:39 +01:00
tm . put ( uh [ 0 ] , uh ) ;
2006-09-07 20:24:39 +02:00
} else {
2006-10-19 00:25:07 +02:00
us = le . comp ( ) . url ( ) . toNormalform ( ) ;
2006-09-07 20:24:39 +02:00
tm . put ( us , uh ) ;
2005-10-05 16:11:50 +02:00
}
}
2006-01-24 14:28:50 +01:00
URL url ;
2005-10-05 16:11:50 +02:00
final Iterator iter = tm . keySet ( ) . iterator ( ) ;
while ( iter . hasNext ( ) ) {
us = iter . next ( ) . toString ( ) ;
2006-01-19 15:13:39 +01:00
uh = ( String [ ] ) tm . get ( us ) ;
if ( us . equals ( uh [ 0 ] ) ) {
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList_urlList_ " + i + " _urlExists " , 0 ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlhxCount " , i ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlhxValue " , uh [ 0 ] ) ;
2005-12-26 20:41:55 +01:00
} else {
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList_urlList_ " + i + " _urlExists " , 1 ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlhxCount " , i ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlhxValue " , uh [ 0 ] ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_keyString " , keystring ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_keyHash " , keyhash ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlString " , us ) ;
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_pos " , uh [ 1 ] ) ;
2006-01-24 14:28:50 +01:00
url = new URL ( us ) ;
2006-08-12 04:42:10 +02:00
if ( plasmaSwitchboard . urlBlacklist . isListed ( plasmaURLPattern . BLACKLIST_DHT , url ) ) {
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList_urlList_ " + i + " _urlExists_urlhxChecked " , 1 ) ;
2006-01-24 14:28:50 +01:00
}
2005-04-07 21:19:42 +02:00
}
2006-09-21 22:36:46 +02:00
i + + ;
2005-04-07 21:19:42 +02:00
}
2006-09-21 22:36:46 +02:00
prop . put ( " genUrlList_urlList " , i ) ;
prop . put ( " genUrlList_keyString " , keystring ) ;
2006-11-24 02:12:14 +01:00
prop . put ( " genUrlList_count " , i ) ;
2005-04-07 21:19:42 +02:00
}
2005-10-05 16:11:50 +02:00
index = null ;
2006-09-21 22:36:46 +02:00
return prop ;
2005-04-07 21:19:42 +02:00
} catch ( IOException e ) {
2006-09-21 22:36:46 +02:00
return prop ;
2005-10-05 12:45:33 +02:00
} finally {
2006-01-30 01:42:38 +01:00
if ( index ! = null ) index = null ;
2005-04-07 21:19:42 +02:00
}
}
2005-10-05 16:11:50 +02:00
2006-09-07 16:32:46 +02:00
}