yacy_search_server/source/de/anomic/data/WorkTables.java
orbiter 5a994c9796 added a scheduler based on API actions
- every process that is monitored with the API Steering interface can now be scheduled!
- added input methods in Steering interface to set a scheduling time
- added a view on the steering api that shows only crawl jobs inside the Crawl Profile servlet
- added a scheduling call process in the cleanup process handler that triggers the scheduled processes
This causes that the cleanup now also looks for scheduled processes. Such processes are therefore not executed at
the same time as given in the target execution time but they will be executed within the cleanup process time window.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7050 6c8d7289-2bf4-0310-a012-ef5d649a1542
2010-08-19 12:13:54 +00:00

220 lines
10 KiB
Java

// Work.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.02.2010 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 6539 $
// $LastChangedBy: low012 $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.data;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import net.yacy.cora.protocol.Client;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.DateFormatter;
import de.anomic.server.serverObjects;
public class WorkTables extends Tables {
public final static String TABLE_API_NAME = "api";
public final static String TABLE_API_TYPE_STEERING = "steering";
public final static String TABLE_API_TYPE_CONFIGURATION = "configuration";
public final static String TABLE_API_TYPE_CRAWLER = "crawler";
public final static String TABLE_API_COL_TYPE = "type";
public final static String TABLE_API_COL_COMMENT = "comment";
public final static String TABLE_API_COL_DATE_RECORDING = "date_recording"; // if not present default to old date field
public final static String TABLE_API_COL_DATE_LAST_EXEC = "date_last_exec"; // if not present default to old date field
public final static String TABLE_API_COL_DATE_NEXT_EXEC = "date_next_exec"; // if not present default to zero
public final static String TABLE_API_COL_DATE = "date"; // old date; do not set in new records
public final static String TABLE_API_COL_URL = "url";
public final static String TABLE_API_COL_APICALL_PK = "apicall_pk"; // the primary key for the table entry of that api call (not really a database field, only a name in the apicall)
public final static String TABLE_API_COL_APICALL_COUNT = "apicall_count"; // counts how often the API was called (starts with 1)
public final static String TABLE_API_COL_APICALL_SCHEDULE_TIME = "apicall_schedule_time"; // factor for SCHEULE_UNIT time units
public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT= "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days'
public final static String TABLE_ROBOTS_NAME = "robots";
public WorkTables(final File workPath) {
super(workPath, 12);
}
/**
* recording of a api call. stores the call parameters into the API database table
* @param post the post arguments of the api call
* @param servletName the name of the servlet
* @param type name of the servlet category
* @param comment visual description of the process
*/
public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) {
// remove the apicall attributes from the post object
String pk = post.remove(TABLE_API_COL_APICALL_PK);
String count = post.remove(TABLE_API_COL_APICALL_COUNT);
if (count == null) count = "1";
String time = post.remove(TABLE_API_COL_APICALL_SCHEDULE_TIME);
String unit = post.remove(TABLE_API_COL_APICALL_SCHEDULE_UNIT);
if (time == null || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
time = ""; unit = "";
}
// generate the apicall url - without the apicall attributes
final String apiurl = /*"http://localhost:" + getConfig("port", "8080") +*/ "/" + servletName + "?" + post.toString();
// read old entry from the apicall table (if exists)
Row row = null;
try {
row = (pk == null) ? null : super.select(TABLE_API_NAME, pk.getBytes());
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
// insert or update entry
try {
if (row != null) {
// modify and update existing entry
// modify date attributes and patch old values
row.put(TABLE_API_COL_DATE_LAST_EXEC, DateFormatter.formatShortMilliSecond(new Date()).getBytes());
if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
row.remove(TABLE_API_COL_DATE);
// insert APICALL attributes
row.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
row.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
row.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
super.update(TABLE_API_NAME, row);
} else {
// create and insert new entry
Data data = new Data();
data.put(TABLE_API_COL_TYPE, type.getBytes());
data.put(TABLE_API_COL_COMMENT, comment.getBytes());
byte[] date = DateFormatter.formatShortMilliSecond(new Date()).getBytes();
data.put(TABLE_API_COL_DATE_RECORDING, date);
data.put(TABLE_API_COL_DATE_LAST_EXEC, date);
data.put(TABLE_API_COL_URL, apiurl.getBytes());
// insert APICALL attributes
data.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
super.insert(TABLE_API_NAME, data);
}
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
Log.logInfo("APICALL", apiurl);
}
/**
* execute an API call using a api table row which contains all essentials
* to access the server also the host, port and the authentication realm must be given
* @param pks a collection of primary keys denoting the rows in the api table
* @param host the host where the api shall be called
* @param port the port on the host
* @param realm authentification realm
* @return a map of the called urls and the http status code of the api call or -1 if any other IOException occurred
*/
public Map<String, Integer> execAPICall(Collection<String> pks, String host, int port, String realm) {
// now call the api URLs and store the result status
final Client client = new Client();
client.setRealm(realm);
client.setTimout(120000);
LinkedHashMap<String, Integer> l = new LinkedHashMap<String, Integer>();
for (String pk: pks) {
Tables.Row row = null;
try {
row = select(WorkTables.TABLE_API_NAME, pk.getBytes());
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
if (row == null) continue;
String url = "http://" + host + ":" + port + new String(row.get(WorkTables.TABLE_API_COL_URL));
url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + new String(row.getPK());
url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1);
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, "");
url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "");
try {
client.GETbytes(url);
l.put(url, client.getStatusCode());
} catch (IOException e) {
Log.logException(e);
l.put(url, -1);
}
}
return l;
}
/**
* simplified call to execute a single entry in the api database table
* @param pk the primary key of the entry
* @param host the host where the api shall be called
* @param port the port on the host
* @param realm authentification realm
* @return the http status code of the api call or -1 if any other IOException occurred
*/
public int execAPICall(String pk, String host, int port, String realm) {
ArrayList<String> pks = new ArrayList<String>();
pks.add(pk);
Map<String, Integer> m = execAPICall(pks, host, port, realm);
if (m.isEmpty()) return -1;
return m.values().iterator().next().intValue();
}
/**
* calculate the execution time in a api call table based on given scheduling time and last execution time
* @param row the database row in the api table
*/
public static void calculateAPIScheduler(Tables.Row row, boolean update) {
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, new Date()) : null;
date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
if (time <= 0) {
row.remove(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC);
return;
}
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
long d = date.getTime();
if (unit.equals("minutes")) d += 60000L * time;
if (unit.equals("hours")) d += 60000L * 60L * time;
if (unit.equals("days")) d += 60000L * 60L * 24L * time;
if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
}
}