2011-04-08 23:15:10 +02:00
// WorkTables.java
2010-02-04 12:26:23 +01:00
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.02.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
2011-03-08 02:51:51 +01:00
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
2010-02-04 12:26:23 +01:00
//
// LICENSE
2012-07-27 12:13:53 +02:00
//
2010-02-04 12:26:23 +01:00
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2012-09-21 15:48:16 +02:00
package net.yacy.data ;
2010-02-04 12:26:23 +01:00
import java.io.File ;
import java.io.IOException ;
2014-10-04 04:11:48 +02:00
import java.net.MalformedURLException ;
2012-12-22 16:27:14 +01:00
import java.text.ParseException ;
import java.text.SimpleDateFormat ;
2010-08-19 14:13:54 +02:00
import java.util.ArrayList ;
import java.util.Collection ;
2010-02-04 12:26:23 +01:00
import java.util.Date ;
2015-05-11 14:42:21 +02:00
import java.util.HashMap ;
2011-01-03 21:52:54 +01:00
import java.util.Iterator ;
2010-08-19 14:13:54 +02:00
import java.util.LinkedHashMap ;
import java.util.Map ;
2011-01-03 21:52:54 +01:00
import java.util.TreeMap ;
2010-02-04 12:26:23 +01:00
2017-03-30 09:22:28 +02:00
import org.apache.http.Header ;
import org.apache.http.HttpStatus ;
2015-05-11 14:42:21 +02:00
import org.apache.http.entity.mime.content.ContentBody ;
2011-01-03 21:52:54 +01:00
import net.yacy.cora.date.GenericFormatter ;
2013-09-15 00:30:23 +02:00
import net.yacy.cora.document.encoding.ASCII ;
import net.yacy.cora.document.encoding.UTF8 ;
import net.yacy.cora.document.id.DigestURL ;
2014-10-04 04:11:48 +02:00
import net.yacy.cora.document.id.MultiProtocolURL ;
2012-09-21 16:46:57 +02:00
import net.yacy.cora.order.Base64Order ;
2012-10-28 19:56:02 +01:00
import net.yacy.cora.protocol.ClientIdentification ;
2017-03-30 09:22:28 +02:00
import net.yacy.cora.protocol.HeaderFramework ;
2010-08-23 00:32:39 +02:00
import net.yacy.cora.protocol.http.HTTPClient ;
2012-07-27 12:13:53 +02:00
import net.yacy.cora.storage.HandleSet ;
2013-07-09 14:28:25 +02:00
import net.yacy.cora.util.ConcurrentLog ;
2012-07-27 12:13:53 +02:00
import net.yacy.cora.util.SpaceExceededException ;
2010-02-04 12:26:23 +01:00
import net.yacy.kelondro.blob.Tables ;
2010-12-06 15:34:58 +01:00
import net.yacy.kelondro.data.word.WordReference ;
import net.yacy.kelondro.rwi.IndexCell ;
2011-09-25 18:59:06 +02:00
import net.yacy.search.Switchboard ;
2012-09-21 15:48:16 +02:00
import net.yacy.server.serverObjects ;
2010-02-04 12:26:23 +01:00
public class WorkTables extends Tables {
2012-07-27 12:13:53 +02:00
2010-02-04 12:26:23 +01:00
public final static String TABLE_API_NAME = " api " ;
public final static String TABLE_API_TYPE_STEERING = " steering " ;
public final static String TABLE_API_TYPE_CONFIGURATION = " configuration " ;
public final static String TABLE_API_TYPE_CRAWLER = " crawler " ;
2013-04-30 02:11:28 +02:00
public final static String TABLE_API_TYPE_DELETION = " deletion " ;
2016-02-24 15:13:20 +01:00
public final static String TABLE_API_TYPE_DUMP = " dump " ;
2012-07-27 12:13:53 +02:00
2010-02-04 12:26:23 +01:00
public final static String TABLE_API_COL_TYPE = " type " ;
public final static String TABLE_API_COL_COMMENT = " comment " ;
2010-08-18 17:56:38 +02:00
public final static String TABLE_API_COL_DATE_RECORDING = " date_recording " ; // if not present default to old date field
public final static String TABLE_API_COL_DATE_LAST_EXEC = " date_last_exec " ; // if not present default to old date field
public final static String TABLE_API_COL_DATE_NEXT_EXEC = " date_next_exec " ; // if not present default to zero
public final static String TABLE_API_COL_DATE = " date " ; // old date; do not set in new records
2010-02-04 12:26:23 +01:00
public final static String TABLE_API_COL_URL = " url " ;
2010-08-18 17:56:38 +02:00
public final static String TABLE_API_COL_APICALL_PK = " apicall_pk " ; // the primary key for the table entry of that api call (not really a database field, only a name in the apicall)
public final static String TABLE_API_COL_APICALL_COUNT = " apicall_count " ; // counts how often the API was called (starts with 1)
public final static String TABLE_API_COL_APICALL_SCHEDULE_TIME = " apicall_schedule_time " ; // factor for SCHEULE_UNIT time units
2012-12-22 16:27:14 +01:00
public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT = " apicall_schedule_unit " ; // may be 'minutes', 'hours', 'days'
2016-08-09 03:03:04 +02:00
public final static String TABLE_API_COL_APICALL_EVENT_KIND = " apicall_event_kind " ; //
2021-09-16 22:23:51 +02:00
public final static String TABLE_API_COL_APICALL_EVENT_ACTION = " apicall_event_action " ; //
2010-08-31 17:47:47 +02:00
2010-03-04 12:58:07 +01:00
public final static String TABLE_ROBOTS_NAME = " robots " ;
2012-07-27 12:13:53 +02:00
2010-08-31 17:47:47 +02:00
public final static String TABLE_ACTIVECRAWLS_NAME = " crawljobsActive " ;
public final static String TABLE_PASSIVECRAWLS_NAME = " crawljobsPassive " ;
2010-12-06 15:34:58 +01:00
2012-07-27 12:13:53 +02:00
2010-04-13 03:16:09 +02:00
public WorkTables ( final File workPath ) {
2010-02-04 12:26:23 +01:00
super ( workPath , 12 ) ;
}
2021-09-16 22:23:51 +02:00
2010-08-19 14:13:54 +02:00
/ * *
2021-09-16 22:23:51 +02:00
*
2018-01-10 17:05:53 +01:00
* @param post the api call eventual request parameters .
* @param servletName the name of the servlet . Must not be null .
* @return the API URL to be recorded , formatted to include request parameters as URL query parameters
2010-08-19 14:13:54 +02:00
* /
2017-05-03 18:53:01 +02:00
public static String generateRecordedURL ( final serverObjects post , final String servletName ) {
2021-09-16 22:23:51 +02:00
/ * Before API URL serialization , we set any eventual transaction token value to empty :
* this will later help identify a new valid transaction token will be necessary ,
2018-01-10 17:05:53 +01:00
* but prevents revealing it in the URL displayed in the process scheduler and prevents storing an outdated value * /
final String transactionToken ;
if ( post ! = null ) {
transactionToken = post . get ( TransactionManager . TRANSACTION_TOKEN_PARAM ) ;
} else {
transactionToken = null ;
}
if ( transactionToken ! = null & & post ! = null ) {
2017-03-30 09:22:28 +02:00
post . put ( TransactionManager . TRANSACTION_TOKEN_PARAM , " " ) ;
}
2021-09-16 22:23:51 +02:00
2010-08-18 17:56:38 +02:00
// generate the apicall url - without the apicall attributes
2018-01-10 17:05:53 +01:00
String apiurl = " / " + servletName ;
if ( post ! = null ) {
apiurl + = " ? " + post . toString ( ) ;
}
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
/* Now restore the eventual transaction token to prevent side effects on the post object eventually still used by the caller */
2018-01-10 17:05:53 +01:00
if ( post ! = null ) {
if ( transactionToken ! = null ) {
post . put ( TransactionManager . TRANSACTION_TOKEN_PARAM , transactionToken ) ;
} else {
post . remove ( TransactionManager . TRANSACTION_TOKEN_PARAM ) ;
}
2017-03-30 09:22:28 +02:00
}
2021-09-16 22:23:51 +02:00
2017-05-03 18:53:01 +02:00
return apiurl ;
}
2021-09-16 22:23:51 +02:00
2018-01-10 17:05:53 +01:00
/ * *
* @param servletName the servlet name used to identify the API when the call is recorded .
* @param post Servlet request parameters . Must not be null .
* @param sb the { @link Switchboard } instance . Must not be null .
* @return the most recently recorded call to the given API with the same parameters , or null when no one was found or data is not accessible
* /
public static Row selectLastExecutedApiCall ( final String servletName , final serverObjects post , final Switchboard sb ) {
Row lastRecordedCall = null ;
if ( servletName ! = null & & sb ! = null & & sb . tables ! = null ) {
try {
if ( post ! = null & & post . containsKey ( WorkTables . TABLE_API_COL_APICALL_PK ) ) {
/ *
* Search the table on the primary key when when present ( re - execution of a
* recorded call )
* /
lastRecordedCall = sb . tables . select ( WorkTables . TABLE_API_NAME ,
UTF8 . getBytes ( post . get ( WorkTables . TABLE_API_COL_APICALL_PK ) ) ) ;
} else {
/* Else search the table on the API URL as recorded (including parameters) */
final String apiURL = WorkTables . generateRecordedURL ( post , servletName ) ;
final Iterator < Row > rowsIt = sb . tables . iterator ( WorkTables . TABLE_API_NAME ,
WorkTables . TABLE_API_COL_URL , UTF8 . getBytes ( apiURL ) ) ;
while ( rowsIt . hasNext ( ) ) {
final Row currentRow = rowsIt . next ( ) ;
if ( currentRow ! = null ) {
final Date currentLastExec = currentRow . get ( WorkTables . TABLE_API_COL_DATE_LAST_EXEC ,
( Date ) null ) ;
if ( currentLastExec ! = null ) {
if ( lastRecordedCall = = null ) {
/ *
* Do not break now the loop : we are looking for the most recent API call on
* the same URL
* /
lastRecordedCall = currentRow ;
} else if ( lastRecordedCall . get ( WorkTables . TABLE_API_COL_DATE_LAST_EXEC , ( Date ) null )
. before ( currentLastExec ) ) {
lastRecordedCall = currentRow ;
}
}
}
}
}
} catch ( final IOException e ) {
ConcurrentLog . logException ( e ) ;
} catch ( final SpaceExceededException e ) {
ConcurrentLog . logException ( e ) ;
}
}
return lastRecordedCall ;
}
2017-05-03 18:53:01 +02:00
/ * *
* recording of a api call . stores the call parameters into the API database table
* @param post the post arguments of the api call . Must not be null .
* @param servletName the name of the servlet
* @param type name of the servlet category
* @param comment visual description of the process
* @return the pk of the new entry in the api table
* /
public byte [ ] recordAPICall ( final serverObjects post , final String servletName , final String type , final String comment ) {
// remove the apicall attributes from the post object
String [ ] pks = post . remove ( TABLE_API_COL_APICALL_PK ) ;
2021-09-16 22:23:51 +02:00
2017-05-03 18:53:01 +02:00
byte [ ] pk = pks = = null ? null : UTF8 . getBytes ( pks [ 0 ] ) ;
// generate the apicall url - without the apicall attributes
final String apiurl = generateRecordedURL ( post , servletName ) ;
2010-08-18 17:56:38 +02:00
// read old entry from the apicall table (if exists)
Row row = null ;
2010-02-04 12:26:23 +01:00
try {
2010-08-26 18:01:45 +02:00
row = ( pk = = null ) ? null : super . select ( TABLE_API_NAME , pk ) ;
2013-07-17 18:31:30 +02:00
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2013-07-17 18:31:30 +02:00
} catch ( final SpaceExceededException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2010-08-18 17:56:38 +02:00
}
2012-07-27 12:13:53 +02:00
2010-08-18 17:56:38 +02:00
// insert or update entry
try {
2010-08-20 01:52:38 +02:00
if ( row = = null ) {
2010-08-18 17:56:38 +02:00
// create and insert new entry
Data data = new Data ( ) ;
2011-04-12 07:02:36 +02:00
data . put ( TABLE_API_COL_TYPE , UTF8 . getBytes ( type ) ) ;
data . put ( TABLE_API_COL_COMMENT , UTF8 . getBytes ( comment ) ) ;
byte [ ] date = UTF8 . getBytes ( GenericFormatter . SHORT_MILSEC_FORMATTER . format ( ) ) ;
2010-08-18 17:56:38 +02:00
data . put ( TABLE_API_COL_DATE_RECORDING , date ) ;
data . put ( TABLE_API_COL_DATE_LAST_EXEC , date ) ;
2011-04-12 07:02:36 +02:00
data . put ( TABLE_API_COL_URL , UTF8 . getBytes ( apiurl ) ) ;
2012-07-27 12:13:53 +02:00
// insert APICALL attributes
2010-08-20 01:52:38 +02:00
data . put ( TABLE_API_COL_APICALL_COUNT , " 1 " ) ;
2010-08-26 18:01:45 +02:00
pk = super . insert ( TABLE_API_NAME , data ) ;
2010-08-20 01:52:38 +02:00
} else {
// modify and update existing entry
// modify date attributes and patch old values
2011-04-12 07:02:36 +02:00
row . put ( TABLE_API_COL_DATE_LAST_EXEC , UTF8 . getBytes ( GenericFormatter . SHORT_MILSEC_FORMATTER . format ( ) ) ) ;
2010-08-20 01:52:38 +02:00
if ( ! row . containsKey ( TABLE_API_COL_DATE_RECORDING ) ) row . put ( TABLE_API_COL_DATE_RECORDING , row . get ( TABLE_API_COL_DATE ) ) ;
row . remove ( TABLE_API_COL_DATE ) ;
2012-07-27 12:13:53 +02:00
// insert APICALL attributes
2010-08-20 01:52:38 +02:00
row . put ( TABLE_API_COL_APICALL_COUNT , row . get ( TABLE_API_COL_APICALL_COUNT , 1 ) + 1 ) ;
2016-08-09 03:03:04 +02:00
calculateAPIScheduler ( row , false ) ; // set next execution time (as this might be a forward existing entry with schedule data)
2010-08-20 01:52:38 +02:00
super . update ( TABLE_API_NAME , row ) ;
2010-08-26 18:01:45 +02:00
assert pk ! = null ;
2010-08-18 17:56:38 +02:00
}
2013-07-17 18:31:30 +02:00
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2013-07-17 18:31:30 +02:00
} catch ( final SpaceExceededException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2010-02-04 12:26:23 +01:00
}
2013-07-09 14:28:25 +02:00
ConcurrentLog . info ( " APICALL " , apiurl ) ;
2010-08-26 18:01:45 +02:00
return pk ;
2010-02-04 12:26:23 +01:00
}
2012-07-27 12:13:53 +02:00
2010-08-20 01:52:38 +02:00
/ * *
* store a API call and set attributes to schedule a re - call of that API call according to a given frequence
2016-08-08 01:57:31 +02:00
* This is the same as the previous method but it also computes a re - call time and stores that additionally
2010-08-20 01:52:38 +02:00
* @param post the post arguments of the api call
* @param servletName the name of the servlet
* @param type name of the servlet category
* @param comment visual description of the process
* @param time the time until next scheduled execution of this api call
* @param unit the time unit for the scheduled call
2010-08-26 18:01:45 +02:00
* @return the pk of the new entry in the api table
2010-08-20 01:52:38 +02:00
* /
2010-08-26 18:01:45 +02:00
public byte [ ] recordAPICall ( final serverObjects post , final String servletName , final String type , final String comment , int time , String unit ) {
2010-08-20 01:52:38 +02:00
if ( post . containsKey ( TABLE_API_COL_APICALL_PK ) ) {
// this api call has already been stored somewhere.
2010-08-26 18:01:45 +02:00
return recordAPICall ( post , servletName , type , comment ) ;
2010-08-20 01:52:38 +02:00
}
2012-07-10 22:59:03 +02:00
if ( time < 0 | | unit = = null | | unit . isEmpty ( ) | | " minutes,hours,days " . indexOf ( unit ) < 0 ) {
2010-08-20 01:52:38 +02:00
time = 0 ; unit = " " ;
} else {
if ( unit . equals ( " minutes " ) & & time < 10 ) time = 10 ;
}
2012-07-27 12:13:53 +02:00
2021-09-16 22:23:51 +02:00
/ * Before API URL serialization , we set any eventual transaction token value to empty :
* this will later help identify a new valid transaction token will be necessary ,
2017-03-30 09:22:28 +02:00
* but without revealing it in the URL displayed in the process scheduler and storing an invalid value * /
final String transactionToken = post . get ( TransactionManager . TRANSACTION_TOKEN_PARAM ) ;
if ( transactionToken ! = null ) {
post . put ( TransactionManager . TRANSACTION_TOKEN_PARAM , " " ) ;
}
2021-09-16 22:23:51 +02:00
2010-08-20 01:52:38 +02:00
// generate the apicall url - without the apicall attributes
2011-01-28 11:54:13 +01:00
final String apiurl = /*"http://localhost:" + getConfig("port", "8090") +*/ " / " + servletName + " ? " + post . toString ( ) ;
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
/* Now restore the eventual transaction token to prevent side effects on the post object eventually still used by the caller */
if ( transactionToken ! = null ) {
post . put ( TransactionManager . TRANSACTION_TOKEN_PARAM , transactionToken ) ;
} else {
post . remove ( TransactionManager . TRANSACTION_TOKEN_PARAM ) ;
}
2021-09-16 22:23:51 +02:00
2010-08-26 18:01:45 +02:00
byte [ ] pk = null ;
2010-08-20 01:52:38 +02:00
// insert entry
try {
// create and insert new entry
Data data = new Data ( ) ;
2011-04-12 07:02:36 +02:00
data . put ( TABLE_API_COL_TYPE , UTF8 . getBytes ( type ) ) ;
data . put ( TABLE_API_COL_COMMENT , UTF8 . getBytes ( comment ) ) ;
2014-09-17 12:54:50 +02:00
byte [ ] date = ASCII . getBytes ( GenericFormatter . SHORT_MILSEC_FORMATTER . format ( ) ) ;
2010-08-20 01:52:38 +02:00
data . put ( TABLE_API_COL_DATE_RECORDING , date ) ;
2016-08-08 01:57:31 +02:00
data . put ( TABLE_API_COL_DATE_LAST_EXEC , date ) ;
2011-04-12 07:02:36 +02:00
data . put ( TABLE_API_COL_URL , UTF8 . getBytes ( apiurl ) ) ;
2012-07-27 12:13:53 +02:00
// insert APICALL attributes
2016-08-08 01:57:31 +02:00
data . put ( TABLE_API_COL_APICALL_COUNT , UTF8 . getBytes ( " 1 " ) ) ;
2012-08-26 17:46:40 +02:00
data . put ( TABLE_API_COL_APICALL_SCHEDULE_TIME , ASCII . getBytes ( Integer . toString ( time ) ) ) ;
2011-04-12 07:02:36 +02:00
data . put ( TABLE_API_COL_APICALL_SCHEDULE_UNIT , UTF8 . getBytes ( unit ) ) ;
2016-08-08 01:57:31 +02:00
calculateAPIScheduler ( data , false ) ; // set next execution time
2010-08-26 18:01:45 +02:00
pk = super . insert ( TABLE_API_NAME , data ) ;
2013-07-17 18:31:30 +02:00
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2013-07-17 18:31:30 +02:00
} catch ( final SpaceExceededException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2010-08-20 01:52:38 +02:00
}
2013-07-09 14:28:25 +02:00
ConcurrentLog . info ( " APICALL " , apiurl ) ;
2010-08-26 18:01:45 +02:00
return pk ;
2010-08-20 01:52:38 +02:00
}
2012-07-27 12:13:53 +02:00
2010-08-19 14:13:54 +02:00
/ * *
* execute an API call using a api table row which contains all essentials
2014-01-05 17:43:34 +01:00
* to access the server also the host and port must be given
2010-08-19 14:13:54 +02:00
* @param pks a collection of primary keys denoting the rows in the api table
* @param host the host where the api shall be called
* @param port the port on the host
* @return a map of the called urls and the http status code of the api call or - 1 if any other IOException occurred
* /
2014-01-07 21:26:50 +01:00
public Map < String , Integer > execAPICalls ( String host , int port , Collection < String > pks , final String username , final String pass ) {
2010-08-19 14:13:54 +02:00
LinkedHashMap < String , Integer > l = new LinkedHashMap < String , Integer > ( ) ;
2021-10-31 18:47:10 +01:00
// now call the api URLs and store the result status
try ( final HTTPClient client = new HTTPClient ( ClientIdentification . yacyInternetCrawlerAgent ) ) {
client . setTimout ( 120000 ) ;
Tables . Row row ;
for ( final String pk : pks ) {
row = null ;
try {
row = select ( WorkTables . TABLE_API_NAME , UTF8 . getBytes ( pk ) ) ;
} catch ( final IOException e ) {
ConcurrentLog . logException ( e ) ;
} catch ( final SpaceExceededException e ) {
ConcurrentLog . logException ( e ) ;
}
if ( row = = null ) continue ;
String theapicall = UTF8 . String ( row . get ( WorkTables . TABLE_API_COL_URL ) ) + " & " + WorkTables . TABLE_API_COL_APICALL_PK + " = " + UTF8 . String ( row . getPK ( ) ) ;
try {
MultiProtocolURL url = new MultiProtocolURL ( " http " , host , port , theapicall ) ;
final Map < String , String > attributes = url . getAttributes ( ) ;
final boolean isTokenProtectedAPI = attributes . containsKey ( TransactionManager . TRANSACTION_TOKEN_PARAM ) ;
// use 4 param MultiProtocolURL to allow api_row_url with searchpart (like url?p=a&p2=b ) in client.GETbytes()
if ( theapicall . length ( ) > 1000 | | isTokenProtectedAPI ) {
// use a POST to execute the call
execPostAPICall ( host , port , username , pass , client , l , url , isTokenProtectedAPI ) ;
} else {
// use a GET to execute the call
ConcurrentLog . info ( " WorkTables " , " executing url: " + url . toNormalform ( true ) ) ;
try {
client . GETbytes ( url , username , pass , false ) ; // use GETbytes(MultiProtocolURL,..) form to allow url in parameter (&url=path%
if ( client . getStatusCode ( ) = = HttpStatus . SC_METHOD_NOT_ALLOWED ) {
/ * GET method not allowed ( HTTP 450 status ) : this may be an old API entry ,
* now restricted to HTTP POST and requiring a transaction token . We try now with POST . * /
execPostAPICall ( host , port , username , pass , client , l , url , true ) ;
} else {
l . put ( url . toNormalform ( true ) , client . getStatusCode ( ) ) ;
}
} catch ( final IOException e ) {
ConcurrentLog . logException ( e ) ;
l . put ( url . toString ( ) , - 1 ) ;
2017-03-30 09:22:28 +02:00
}
2015-05-11 14:42:21 +02:00
}
2021-10-31 18:47:10 +01:00
} catch ( MalformedURLException ex ) {
ConcurrentLog . warn ( " APICALL " , " wrong url in apicall " + theapicall ) ;
2014-10-04 04:11:48 +02:00
}
2010-08-19 14:13:54 +02:00
}
2021-10-31 18:47:10 +01:00
} catch ( IOException e ) {
ConcurrentLog . logException ( e ) ;
2010-08-19 14:13:54 +02:00
}
return l ;
}
2017-03-30 09:22:28 +02:00
/ * *
* Executes an API call using HTTP POST method to the YaCy peer with the given parameters
* @param host the peer host name
* @param port the peer port
* @param username authentication user name
* @param pass authentication encoded password
* @param client the HTTP client to use
* @param results the results map to update
* @param apiURL the full API URL with all parameters
* @param isTokenProtectedAPI set to true when the API is protected by a transaction token
* @throws MalformedURLException when the HTTP POST url could not be derived from apiURL
* /
private void execPostAPICall ( String host , int port , final String username , final String pass ,
2021-09-16 22:23:51 +02:00
final HTTPClient client , final LinkedHashMap < String , Integer > results ,
2017-03-30 09:22:28 +02:00
final MultiProtocolURL apiURL , final boolean isTokenProtectedAPI ) throws MalformedURLException {
Map < String , ContentBody > post = new HashMap < > ( ) ;
for ( Map . Entry < String , String > a : apiURL . getAttributes ( ) . entrySet ( ) ) {
post . put ( a . getKey ( ) , UTF8 . StringBody ( a . getValue ( ) ) ) ;
}
final MultiProtocolURL url = new MultiProtocolURL ( " http " , host , port , apiURL . getPath ( ) ) ;
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
try {
if ( isTokenProtectedAPI ) {
// Eventually acquire first a new valid transaction token before posting data
client . GETbytes ( url , username , pass , false ) ;
if ( client . getStatusCode ( ) ! = HttpStatus . SC_OK ) {
2021-09-16 22:23:51 +02:00
/ * Do not fail immediately , the token may be no more necessary on this API :
2017-03-30 09:22:28 +02:00
* let ' s log a warning but try anyway the POST call that will eventually reject the request * /
ConcurrentLog . warn ( " APICALL " , " Could not retrieve a transaction token for " + apiURL . toNormalform ( true ) ) ;
} else {
final Header transactionTokenHeader = client . getHttpResponse ( )
. getFirstHeader ( HeaderFramework . X_YACY_TRANSACTION_TOKEN ) ;
if ( transactionTokenHeader = = null ) {
/ *
* Do not fail immediately , the token may be no more
* necessary on this API : let ' s log a warning but try
* anyway the POST call that will eventually reject the
* request
* /
ConcurrentLog . warn ( " APICALL " ,
" Could not retrieve a transaction token for " + apiURL . toNormalform ( true ) ) ;
} else {
post . put ( TransactionManager . TRANSACTION_TOKEN_PARAM ,
UTF8 . StringBody ( transactionTokenHeader . getValue ( ) ) ) ;
}
}
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
}
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
client . POSTbytes ( url , " localhost " , post , username , pass , false , false ) ;
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
results . put ( apiURL . toNormalform ( true ) , client . getStatusCode ( ) ) ;
} catch ( final IOException e ) {
ConcurrentLog . logException ( e ) ;
results . put ( apiURL . toNormalform ( true ) , - 1 ) ;
}
}
2021-09-16 22:23:51 +02:00
2017-03-30 09:22:28 +02:00
/ * *
2021-09-16 22:23:51 +02:00
* Executes an HTTP GET API call
2017-03-30 09:22:28 +02:00
* @param host target host name
* @param port target port
* @param path target path
* @param pk the primary key of the api call
* @param username authentication user name
* @param pass authentication encoded password
* @return the API response HTTP status , or - 1 when an error occured
* /
public static int execGetAPICall ( String host , int port , String path , byte [ ] pk , final String username , final String pass ) {
2010-09-28 14:18:54 +02:00
// now call the api URLs and store the result status
String url = " http:// " + host + " : " + port + path ;
2011-03-07 21:36:40 +01:00
if ( pk ! = null ) url + = " & " + WorkTables . TABLE_API_COL_APICALL_PK + " = " + UTF8 . String ( pk ) ;
2021-10-31 18:47:10 +01:00
try ( final HTTPClient client = new HTTPClient ( ClientIdentification . yacyInternetCrawlerAgent ) ) {
client . setTimout ( 120000 ) ;
2014-01-19 15:21:23 +01:00
client . GETbytes ( url , username , pass , false ) ;
2010-09-28 14:18:54 +02:00
return client . getStatusCode ( ) ;
2013-07-17 18:31:30 +02:00
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2010-09-28 14:18:54 +02:00
return - 1 ;
}
}
2012-07-27 12:13:53 +02:00
2010-08-19 14:13:54 +02:00
/ * *
* simplified call to execute a single entry in the api database table
* @param pk the primary key of the entry
* @param host the host where the api shall be called
* @param port the port on the host
* @return the http status code of the api call or - 1 if any other IOException occurred
* /
2014-01-07 21:26:50 +01:00
public int execAPICall ( String pk , String host , int port , final String username , final String pass ) {
2010-08-19 14:13:54 +02:00
ArrayList < String > pks = new ArrayList < String > ( ) ;
pks . add ( pk ) ;
2014-01-07 21:26:50 +01:00
Map < String , Integer > m = execAPICalls ( host , port , pks , username , pass ) ;
2010-08-19 14:13:54 +02:00
if ( m . isEmpty ( ) ) return - 1 ;
return m . values ( ) . iterator ( ) . next ( ) . intValue ( ) ;
}
2012-12-22 16:27:14 +01:00
final static long hour = 1000L * 60L * 60L ;
final static long day = hour * 24L ;
2021-09-16 22:23:51 +02:00
2010-08-19 14:13:54 +02:00
/ * *
* calculate the execution time in a api call table based on given scheduling time and last execution time
* @param row the database row in the api table
2012-12-22 16:27:14 +01:00
* @param update if true then the next execution time is based on the latest computed execution time ; otherwise it is based on the last execution time
2010-08-19 14:13:54 +02:00
* /
2010-08-20 01:52:38 +02:00
public static void calculateAPIScheduler ( Tables . Data row , boolean update ) {
2010-08-26 18:42:00 +02:00
Date date = row . containsKey ( WorkTables . TABLE_API_COL_DATE ) ? row . get ( WorkTables . TABLE_API_COL_DATE , ( Date ) null ) : null ;
2010-08-19 14:13:54 +02:00
date = update ? row . get ( WorkTables . TABLE_API_COL_DATE_NEXT_EXEC , date ) : row . get ( WorkTables . TABLE_API_COL_DATE_LAST_EXEC , date ) ;
2012-12-22 21:16:22 +01:00
if ( date = = null ) return ;
2016-08-09 03:03:04 +02:00
long d = 0 ;
2021-09-16 22:23:51 +02:00
2012-12-22 16:27:14 +01:00
final String kind = row . get ( WorkTables . TABLE_API_COL_APICALL_EVENT_KIND , " off " ) ;
if ( " off " . equals ( kind ) ) {
2016-08-09 03:03:04 +02:00
int time = row . get ( WorkTables . TABLE_API_COL_APICALL_SCHEDULE_TIME , - 1 ) ;
if ( time < = 0 ) { // no schedule time
2012-12-22 16:27:14 +01:00
row . put ( WorkTables . TABLE_API_COL_DATE_NEXT_EXEC , " " ) ;
return ;
}
String unit = row . get ( WorkTables . TABLE_API_COL_APICALL_SCHEDULE_UNIT , " days " ) ;
2016-08-09 03:03:04 +02:00
if ( unit . equals ( " minutes " ) ) d = 60000L * Math . max ( 10 , time ) ;
if ( unit . equals ( " hours " ) ) d = hour * time ;
if ( unit . equals ( " days " ) ) d = day * time ;
if ( ( d + date . getTime ( ) ) < System . currentTimeMillis ( ) ) { // missed schedule
d + = System . currentTimeMillis ( ) ; // advance next exec from now
} else {
d + = date . getTime ( ) ; // advance next exec from last execution
}
2012-12-22 16:27:14 +01:00
d - = d % 60000 ; // remove seconds
} else {
String action = row . get ( WorkTables . TABLE_API_COL_APICALL_EVENT_ACTION , " startup " ) ;
if ( ! " startup " . equals ( action ) ) try {
SimpleDateFormat dateFormat = new SimpleDateFormat ( " yyyyMMddHHmm " ) ;
d = dateFormat . parse ( dateFormat . format ( new Date ( ) ) . substring ( 0 , 8 ) + action ) . getTime ( ) ;
if ( d < System . currentTimeMillis ( ) ) d + = day ;
2013-07-17 18:31:30 +02:00
} catch ( final ParseException e ) { } else {
2012-12-22 21:16:22 +01:00
row . put ( WorkTables . TABLE_API_COL_DATE_NEXT_EXEC , " " ) ;
return ;
}
2012-12-22 16:27:14 +01:00
}
2010-08-19 14:13:54 +02:00
row . put ( WorkTables . TABLE_API_COL_DATE_NEXT_EXEC , new Date ( d ) ) ;
}
2012-07-27 12:13:53 +02:00
2015-08-03 05:37:34 +02:00
public void failURLsRegisterMissingWord ( IndexCell < WordReference > indexCell , final DigestURL url , HandleSet queryHashes ) {
2010-12-06 15:34:58 +01:00
// remove words from index
2012-08-31 14:35:56 +02:00
if ( indexCell ! = null ) {
for ( final byte [ ] word : queryHashes ) {
indexCell . removeDelayed ( word , url . hash ( ) ) ;
}
2010-12-06 15:34:58 +01:00
}
2011-01-17 16:04:00 +01:00
}
2012-07-27 12:13:53 +02:00
2011-01-03 21:52:54 +01:00
public static Map < byte [ ] , String > commentCache ( Switchboard sb ) {
Map < byte [ ] , String > comments = new TreeMap < byte [ ] , String > ( Base64Order . enhancedCoder ) ;
Iterator < Tables . Row > i ;
try {
i = sb . tables . iterator ( WorkTables . TABLE_API_NAME ) ;
Tables . Row row ;
while ( i . hasNext ( ) ) {
row = i . next ( ) ;
2011-03-07 21:36:40 +01:00
comments . put ( row . getPK ( ) , UTF8 . String ( row . get ( WorkTables . TABLE_API_COL_COMMENT ) ) ) ;
2011-01-03 21:52:54 +01:00
}
2013-07-17 18:31:30 +02:00
} catch ( final IOException e ) {
2013-07-09 14:28:25 +02:00
ConcurrentLog . logException ( e ) ;
2011-01-03 21:52:54 +01:00
}
return comments ;
}
2010-02-04 12:26:23 +01:00
}