extended the Scheduler: introduced scheduled events

- an event type (once, regular) can be selected
- for this event type, a fixed time can be selected. This may be either
directly after startup or at one of the full hours at a day (==25
options)
The main point about this feature is the opportunity to start an action
directly after startup. That makes it possible to create YaCy
distributions which, after started at the first time, start to index
parts of the intranet/internet by itself.
This commit is contained in:
Michael Peter Christen 2012-12-22 16:27:14 +01:00
parent 433143ba40
commit 24c9bb35f7
8 changed files with 284 additions and 150 deletions

View File

@ -21,8 +21,8 @@
<script type="text/javascript">
function submitchange(from) {
document.getElementById("apilist").action = "Table_API_p.html#" + from;
document.getElementById ("scheduleevent").value = "true";
document.getElementById("current_schedule_pk").value = from;
document.getElementById ("scheduleeventaction").value = "true";
document.getElementById("current_pk").value = from;
document.getElementById("apilist").submit();
}
</script>
@ -78,6 +78,7 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<th>Recording<br/>Date</th>
<th>Last&nbsp;Exec<br/>Date</th>
<th>Next&nbsp;Exec<br/>Date</th>
<th class="sorttable_nosort">Event Trigger</th>
<th class="sorttable_nosort">Scheduler</th>
#(inline)#<th class="sorttable_nosort">URL</th>::#(/inline)#
</tr>
@ -91,8 +92,54 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
<td>#[dateLastExec]#</td>
<td>#[dateNextExec]#</td>
<td>
#(event)#
<select name="event_select_#[pk]#" onchange='submitchange("#[pk]#")'>
<option value="off" selected="selected">no event</option>
<option value="on">activate event</option>
</select>
::
<table>
<tr><td>
<select name="event_kind_#[pk]#" onchange='submitchange("#[pk]#")'>
<option value="off" #(selectedoff)#::selected="selected"#(/selectedoff)#>off</option>
<option value="once" #(selectedonce)#::selected="selected"#(/selectedonce)#>run once</option>
<option value="regular" #(selectedregular)#::selected="selected"#(/selectedregular)#>run regular</option>
</select>
</td><td>
<select name="event_action_#[pk]#" onchange='submitchange("#[pk]#")'>
<option value="startup" #(selectedstartup)#::selected="selected"#(/selectedstartup)#>after start-up</option>
<option value="0000" #(selected0000)#::selected="selected"#(/selected0000)#>at 00:00h</option>
<option value="0100" #(selected0100)#::selected="selected"#(/selected0100)#>at 01:00h</option>
<option value="0200" #(selected0200)#::selected="selected"#(/selected0200)#>at 02:00h</option>
<option value="0300" #(selected0300)#::selected="selected"#(/selected0300)#>at 03:00h</option>
<option value="0400" #(selected0400)#::selected="selected"#(/selected0400)#>at 04:00h</option>
<option value="0500" #(selected0500)#::selected="selected"#(/selected0500)#>at 05:00h</option>
<option value="0600" #(selected0600)#::selected="selected"#(/selected0600)#>at 06:00h</option>
<option value="0700" #(selected0700)#::selected="selected"#(/selected0700)#>at 07:00h</option>
<option value="0800" #(selected0800)#::selected="selected"#(/selected0800)#>at 08:00h</option>
<option value="0900" #(selected0900)#::selected="selected"#(/selected0900)#>at 09:00h</option>
<option value="1000" #(selected1000)#::selected="selected"#(/selected1000)#>at 10:00h</option>
<option value="1100" #(selected1100)#::selected="selected"#(/selected1100)#>at 11:00h</option>
<option value="1200" #(selected1200)#::selected="selected"#(/selected1200)#>at 12:00h</option>
<option value="1300" #(selected1300)#::selected="selected"#(/selected1300)#>at 13:00h</option>
<option value="1400" #(selected1400)#::selected="selected"#(/selected1400)#>at 14:00h</option>
<option value="1500" #(selected1500)#::selected="selected"#(/selected1500)#>at 15:00h</option>
<option value="1600" #(selected1600)#::selected="selected"#(/selected1600)#>at 16:00h</option>
<option value="1700" #(selected1700)#::selected="selected"#(/selected1700)#>at 17:00h</option>
<option value="1800" #(selected1800)#::selected="selected"#(/selected1800)#>at 18:00h</option>
<option value="1900" #(selected1900)#::selected="selected"#(/selected1900)#>at 19:00h</option>
<option value="2000" #(selected2000)#::selected="selected"#(/selected2000)#>at 20:00h</option>
<option value="2100" #(selected2100)#::selected="selected"#(/selected2100)#>at 21:00h</option>
<option value="2200" #(selected2200)#::selected="selected"#(/selected2200)#>at 22:00h</option>
<option value="2300" #(selected2300)#::selected="selected"#(/selected2300)#>at 23:00h</option>
</select>
</td></tr>
</table>
#(/event)#
</td>
<td>
#(scheduler)#
<select name="repeat_select_#[pk]#" onchange='submitchange("#[pk]#")'>
<select name="repeat_select_#[pk]#" onchange='submitchange("#[pk]#")'#(disabled)#:: disabled="disabled"#(/disabled)#>
<option value="off" selected="selected">no repetition</option>
<option value="on">activate scheduler</option>
</select>
@ -121,8 +168,8 @@ To see a list of all APIs, please visit the <a href="http://www.yacy-websuche.de
</div>
</fieldset>
<p>
<input type="hidden" name="scheduleevent" id="scheduleevent" value="false" />
<input type="hidden" name="current_schedule_pk" id="current_schedule_pk" value="" />
<input type="hidden" name="scheduleeventaction" id="scheduleeventaction" value="false" />
<input type="hidden" name="current_pk" id="current_pk" value="" />
<input type="hidden" name="num" value="#[num]#" />
<input type="submit" name="execrows" value="Execute Selected Actions" />
<input type="submit" name="deleterows" value="Delete Selected Actions" />

View File

@ -31,7 +31,6 @@ import java.util.regex.Pattern;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.WorkTables;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.logging.Log;
@ -73,58 +72,50 @@ public class Table_API_p {
typefilter = Pattern.compile(post.get("filter", ".*"));
}
boolean scheduleevent = false; // flag if schedule info of row changes
String current_schedule_pk = ""; // pk of changed schedule data row
if (post != null && post.containsKey("scheduleevent")) {
scheduleevent = post.get("scheduleevent", "false").equalsIgnoreCase("true");
prop.put("scheduleevent", "false");
current_schedule_pk = post.get("current_schedule_pk", "");
// process scheduler and event input actions
boolean scheduleeventaction = false; // flag if schedule info of row changes
String current_pk = ""; // pk of changed schedule data row
if (post != null && post.containsKey("scheduleeventaction")) {
scheduleeventaction = post.get("scheduleeventaction", "false").equalsIgnoreCase("true");
prop.put("scheduleeventaction", "false");
current_pk = post.get("current_pk", "");
}
if (scheduleevent && !current_schedule_pk.isEmpty()) {
if (post != null && post.containsKey("repeat_select_" + current_schedule_pk) ) {
try {
final String action = post.get("repeat_select_" + current_schedule_pk, "off");
if (action.equals("on")) {
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes());
if (row != null) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7);
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
if (post != null && scheduleeventaction && !current_pk.isEmpty()) {
try {
Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_pk.getBytes());
if (row != null) {
String action;
// events
if (post.containsKey("event_select_" + current_pk) && post.get("event_select_" + current_pk, "off").equals("on")) {
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "regular");
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup");
}
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
if (post != null && post.containsKey("repeat_time_" + current_schedule_pk) ) {
try {
final String action = post.get("repeat_time_" + current_schedule_pk, "off");
final Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes());
if (row != null) {
if ("off".equals(action)) {
if (post.containsKey("event_kind_" + current_pk) ) {
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, post.get("event_kind_" + current_pk, "off"));
}
if (post.containsKey("event_action_" + current_pk) ) {
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, post.get("event_action_" + current_pk, "startup"));
}
// scheduler
if (post.containsKey("repeat_select_" + current_pk) && post.get("repeat_select_" + current_pk, "off").equals("on")) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7);
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
}
if (post.containsKey("repeat_time_" + current_pk) ) {
if ("off".equals(action = post.get("repeat_time_" + current_pk, "off"))) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
} else {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.parseInt(action));
}
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
}
}
if (post != null && post.containsKey("repeat_unit_" + current_schedule_pk) ) {
try {
final String action = post.get("repeat_unit_" + current_schedule_pk, "seldays");
final Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, current_schedule_pk.getBytes());
if (row != null) {
if (post.containsKey("repeat_unit_" + current_pk) ) {
action = post.get("repeat_unit_" + current_pk, "seldays");
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, action.substring(3));
if (action.equals("selminutes") && time > 0 && time < 10) {
@ -139,16 +130,18 @@ public class Table_API_p {
if (action.equals("seldays") && time > 30) {
row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 30);
}
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
} catch (IOException e) {
Log.logException(e);
} catch (SpaceExceededException e) {
Log.logException(e);
// switch scheduler off if event kind is 'regular'
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
if ("regular".equals(kind)) row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
WorkTables.calculateAPIScheduler(row, false);
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
}
} catch (Throwable e) { Log.logException(e); }
}
if (post != null && !post.get("deleterows", "").isEmpty()) {
for (final Map.Entry<String, String> entry : post.entrySet()) {
if (entry.getValue().startsWith("mark_")) {
@ -241,8 +234,6 @@ public class Table_API_p {
final Date date_last_exec = row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
final int callcount = row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1);
final String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
final int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
prop.put("showtable_list_" + count + "_inline", inline ? 1 : 0);
prop.put("showtable_list_" + count + "_dark", dark ? 1 : 0);
dark = !dark;
@ -252,21 +243,50 @@ public class Table_API_p {
prop.put("showtable_list_" + count + "_dateRecording", date_recording == null ? "-" : DateFormat.getDateTimeInstance().format(date_recording));
prop.put("showtable_list_" + count + "_dateLastExec", date_last_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_last_exec));
prop.put("showtable_list_" + count + "_dateNextExec", date_next_exec == null ? "-" : DateFormat.getDateTimeInstance().format(date_next_exec));
prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedDays", (unit.isEmpty() || unit.equals("days")) ? 1 : 0);
prop.put("showtable_list_" + count + "_repeatTime", time);
prop.put("showtable_list_" + count + "_type", row.get(WorkTables.TABLE_API_COL_TYPE));
prop.put("showtable_list_" + count + "_comment", row.get(WorkTables.TABLE_API_COL_COMMENT));
prop.putHTML("showtable_list_" + count + "_inline_url", "http://" + sb.myPublicIP() + ":" + sb.getConfig("port", "8090") + UTF8.String(row.get(WorkTables.TABLE_API_COL_URL)));
prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false");
prop.put("showtable_list_" + count + "_scheduler_filter", typefilter.pattern());
prop.put("showtable_list_" + count + "_scheduler_query", query.pattern());
prop.put("showtable_list_" + count + "_scheduler_startRecord", startRecord);
prop.put("showtable_list_" + count + "_scheduler_maximumRecords", maximumRecords);
// events
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
final String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup");
prop.put("showtable_list_" + count + "_event_pk", UTF8.String(row.getPK()));
boolean schedulerDisabled = "regular".equals(kind);
if ("off".equals(kind)) {
prop.put("showtable_list_" + count + "_event", 0);
} else {
prop.put("showtable_list_" + count + "_event", 1);
prop.put("showtable_list_" + count + "_event_selectedoff", "off".equals(kind) ? 1 : 0);
prop.put("showtable_list_" + count + "_event_selectedonce", "once".equals(kind) ? 1 : 0);
prop.put("showtable_list_" + count + "_event_selectedregular", "regular".equals(kind) ? 1 : 0);
prop.put("showtable_list_" + count + "_event_selectedstartup", "startup".equals(action) ? 1 : 0);
for (int i = 0; i < 24; i++) {
String is = Integer.toString(i);
if (is.length() == 1) is = "0" + is;
is = is + "00";
prop.put("showtable_list_" + count + "_event_selected" + is, is.equals(action) ? 1 : 0);
}
}
// scheduler
final String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
final int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
prop.put("showtable_list_" + count + "_selectedMinutes", unit.equals("minutes") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedHours", unit.equals("hours") ? 1 : 0);
prop.put("showtable_list_" + count + "_selectedDays", (unit.isEmpty() || unit.equals("days")) ? 1 : 0);
prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK()));
prop.put("showtable_list_" + count + "_scheduler_disabled", schedulerDisabled ? 1 : 0);
prop.put("showtable_list_" + count + "_repeatTime", time);
if (time == 0) {
prop.put("showtable_list_" + count + "_scheduler", 0);
prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK()));
} else {
scheduledactions = true;
prop.put("showtable_list_" + count + "_scheduler", 1);
prop.put("showtable_list_" + count + "_scheduler_pk", UTF8.String(row.getPK()));
prop.put("showtable_list_" + count + "_scheduler_scale_" + 0 + "_time", "off");
prop.put("showtable_list_" + count + "_scheduler_selectedMinutes", 0);
prop.put("showtable_list_" + count + "_scheduler_selectedHours", 0);
@ -296,12 +316,8 @@ public class Table_API_p {
prop.put("showtable_list_" + count + "_scheduler_scale", 31);
prop.put("showtable_list_" + count + "_scheduler_selectedDays", 1);
}
}
prop.put("showtable_list_" + count + "_scheduler_inline", inline ? "true" : "false");
prop.put("showtable_list_" + count + "_scheduler_filter", typefilter.pattern());
prop.put("showtable_list_" + count + "_scheduler_query", query.pattern());
prop.put("showtable_list_" + count + "_scheduler_startRecord", startRecord);
prop.put("showtable_list_" + count + "_scheduler_maximumRecords", maximumRecords);
count++;
}
if (scheduledactions) {

View File

@ -138,7 +138,9 @@ public class GenericFormatter extends AbstractFormatter implements DateFormatter
if (timeString == null || timeString.isEmpty()) { return new Date(); }
if (UTCOffset == null || UTCOffset.isEmpty()) { return new Date(); }
try {
return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset));
synchronized (this.dateFormat) {
return new Date(this.dateFormat.parse(timeString).getTime() - UTCDiff() + UTCDiff(UTCOffset));
}
} catch (final Throwable e) {
//serverLog.logFinest("parseUniversalDate", e.getMessage() + ", remoteTimeString=[" + remoteTimeString + "]");
return new Date();

View File

@ -28,11 +28,14 @@ package net.yacy.data;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
@ -70,7 +73,9 @@ public class WorkTables extends Tables {
public final static String TABLE_API_COL_APICALL_PK = "apicall_pk"; // the primary key for the table entry of that api call (not really a database field, only a name in the apicall)
public final static String TABLE_API_COL_APICALL_COUNT = "apicall_count"; // counts how often the API was called (starts with 1)
public final static String TABLE_API_COL_APICALL_SCHEDULE_TIME = "apicall_schedule_time"; // factor for SCHEULE_UNIT time units
public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT= "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days'
public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT = "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days'
public final static String TABLE_API_COL_APICALL_EVENT_KIND = "apicall_event_kind"; //
public final static String TABLE_API_COL_APICALL_EVENT_ACTION = "apicall_event_action"; //
public final static String TABLE_ROBOTS_NAME = "robots";
@ -277,26 +282,40 @@ public class WorkTables extends Tables {
return m.values().iterator().next().intValue();
}
final static long hour = 1000L * 60L * 60L;
final static long day = hour * 24L;
/**
* calculate the execution time in a api call table based on given scheduling time and last execution time
* @param row the database row in the api table
* @param update if true then the next execution time is based on the latest computed execution time; othervise it is based on the last execution time
* @param update if true then the next execution time is based on the latest computed execution time; otherwise it is based on the last execution time
*/
public static void calculateAPIScheduler(Tables.Data row, boolean update) {
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, (Date) null) : null;
date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
if (time <= 0) {
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, "");
return;
}
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
long d = date.getTime();
if (unit.equals("minutes")) d += 60000L * Math.max(10, time);
if (unit.equals("hours")) d += 60000L * 60L * time;
if (unit.equals("days")) d += 60000L * 60L * 24L * time;
if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
d -= d % 60000; // remove seconds
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
if ("off".equals(kind)) {
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
if (time <= 0) {
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, "");
return;
}
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
if (unit.equals("minutes")) d += 60000L * Math.max(10, time);
if (unit.equals("hours")) d += 60000L * 60L * time;
if (unit.equals("days")) d += 60000L * 60L * 24L * time;
if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
d -= d % 60000; // remove seconds
} else {
String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup");
if (!"startup".equals(action)) try {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
d = dateFormat.parse(dateFormat.format(new Date()).substring(0, 8) + action).getTime();
if (d < System.currentTimeMillis()) d += day;
} catch (ParseException e) {}
}
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
}

View File

@ -58,6 +58,16 @@ import org.apache.solr.common.SolrInputDocument;
*/
public class URIMetadataNode {
public static YaCySchema[] fieldList = new YaCySchema[]{
YaCySchema.audiolinkscount_i, YaCySchema.author, YaCySchema.collection_sxt, YaCySchema.content_type,
YaCySchema.coordinate_p, YaCySchema.description, YaCySchema.fresh_date_dt, YaCySchema.host_id_s, YaCySchema.id,
YaCySchema.imagescount_i, YaCySchema.inboundlinks_protocol_sxt, YaCySchema.inboundlinks_urlstub_txt,
YaCySchema.inboundlinkscount_i, YaCySchema.keywords, YaCySchema.language_s, YaCySchema.last_modified, YaCySchema.load_date_dt,
YaCySchema.md5_s, YaCySchema.outboundlinks_protocol_sxt, YaCySchema.outboundlinks_urlstub_txt,
YaCySchema.outboundlinkscount_i, YaCySchema.publisher_t, YaCySchema.referrer_id_txt, YaCySchema.size_i, YaCySchema.sku,
YaCySchema.text_t, YaCySchema.title, YaCySchema.title_words_val, YaCySchema.url_chars_i,
YaCySchema.videolinkscount_i, YaCySchema.videolinkscount_i, YaCySchema.wordcount_i};
private byte[] hash = null;
private String urlRaw = null, keywords = null;
private DigestURI url = null;

View File

@ -65,12 +65,20 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT
return idlePause;
}
public final long getIdleSleep() {
return idlePause;
}
public final long setBusySleep(final long milliseconds) {
// sets a sleep time for pauses between two jobs
busyPause = Math.min(this.maxBusySleep, Math.max(this.minBusySleep, milliseconds));
return busyPause;
}
public final long getBusySleep() {
return busyPause;
}
public void setMemPreReqisite(final long freeBytes) {
// sets minimum required amount of memory for the job execution
memprereq = freeBytes;

View File

@ -40,13 +40,25 @@ public interface BusyThread extends WorkflowThread {
*/
public long setIdleSleep(long milliseconds);
/**
* gets the sleep time for pauses between two jobs if the job returns false (idle)
* @return milliseconds
*/
public long getIdleSleep();
/**
* sets a sleep time for pauses between two jobs if the job returns true (busy)
* @param milliseconds
* @return
*/
public long setBusySleep(long milliseconds);
/**
* gets the sleep time for pauses between two jobs if the job returns true (busy)
* @return milliseconds
*/
public long getBusySleep();
/**
* sets minimum required amount of memory for the job execution
* @param freeBytes

View File

@ -53,10 +53,14 @@ import java.net.MalformedURLException;
import java.security.NoSuchAlgorithmException;
import java.security.PublicKey;
import java.security.spec.InvalidKeySpecException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@ -273,6 +277,7 @@ public final class Switchboard extends serverSwitch {
private final Semaphore shutdownSync = new Semaphore(0);
private boolean terminate = false;
private boolean startupAction = true; // this is set to false after the first event
private static Switchboard sb;
public HashMap<String, Object[]> crawlJobsStatus = new HashMap<String, Object[]>();
@ -1062,6 +1067,14 @@ public final class Switchboard extends serverSwitch {
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260);
this.trail = new LinkedBlockingQueue<String>();
// finally start jobs which shall be started after start-up
new Thread() {
public void run() {
try {Thread.sleep(10000);} catch (InterruptedException e) {} // we must wait until the httpd comes up
execAPIActions(); // trigger startup actions
}
}.start();
this.log.logConfig("Finished Switchboard Initialization");
}
@ -1992,27 +2005,19 @@ public final class Switchboard extends serverSwitch {
insert = true;
}
if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) {
selentry.put(
CrawlProfile.RECRAWL_IF_OLDER,
Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE)));
selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE)));
insert = true;
}
if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) {
selentry.put(
CrawlProfile.RECRAWL_IF_OLDER,
Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE)));
selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE)));
insert = true;
}
if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) {
selentry.put(
CrawlProfile.RECRAWL_IF_OLDER,
Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE)));
selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE)));
insert = true;
}
if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) {
selentry.put(
CrawlProfile.RECRAWL_IF_OLDER,
Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE)));
selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE)));
insert = true;
}
if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE) ) {
@ -2027,58 +2032,7 @@ public final class Switchboard extends serverSwitch {
Log.logException(e);
}
// execute scheduled API actions
Tables.Row row;
final List<String> pks = new ArrayList<String>();
final Date now = new Date();
try {
final Iterator<Tables.Row> plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME);
final Iterator<Tables.Row> mapIterator =
this.tables
.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING)
.iterator();
while ( mapIterator.hasNext() ) {
row = mapIterator.next();
if ( row == null ) {
continue;
}
final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
if ( date_next_exec == null ) {
continue;
}
if ( date_next_exec.after(now) ) {
continue;
}
pks.add(UTF8.String(row.getPK()));
}
} catch ( final IOException e ) {
Log.logException(e);
}
for ( final String pk : pks ) {
try {
row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
WorkTables.calculateAPIScheduler(row, true); // calculate next update time
this.tables.update(WorkTables.TABLE_API_NAME, row);
} catch ( final IOException e ) {
Log.logException(e);
continue;
} catch ( final SpaceExceededException e ) {
Log.logException(e);
continue;
}
}
final Map<String, Integer> callResult =
this.tables.execAPICalls(
"localhost",
(int) getConfigLong("port", 8090),
getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""),
pks);
for ( final Map.Entry<String, Integer> call : callResult.entrySet() ) {
this.log.logInfo("Scheduler executed api call, response "
+ call.getValue()
+ ": "
+ call.getKey());
}
execAPIActions();
// close unused connections
ConnectionInfo.cleanUp();
@ -2266,6 +2220,72 @@ public final class Switchboard extends serverSwitch {
}
}
private void execAPIActions() {
// execute scheduled API actions
Tables.Row row;
final Collection<String> pks = new LinkedHashSet<String>();
final Date now = new Date();
try {
final Iterator<Tables.Row> plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME);
final Iterator<Tables.Row> mapIterator = this.tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator();
while (mapIterator.hasNext()) {
row = mapIterator.next();
if (row == null) continue;
// select api calls according to scheduler settings
final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK()));
// select api calls according to event settings
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
if (!"off".equals(kind)) {
String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup");
if ("startup".equals(action)) {
if (startupAction) {
pks.add(UTF8.String(row.getPK()));
if ("once".equals(kind)) {
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
}
} else try {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
long d = dateFormat.parse(dateFormat.format(new Date()).substring(0, 8) + action).getTime();
long cycle = getThread(SwitchboardConstants.CLEANUP).getBusySleep();
if (d < System.currentTimeMillis() && System.currentTimeMillis() - d < cycle) {
pks.add(UTF8.String(row.getPK()));
if ("once".equals(kind)) {
row.put(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
sb.tables.update(WorkTables.TABLE_API_NAME, row);
}
}
} catch (ParseException e) {}
}
}
} catch (final IOException e) {
Log.logException(e);
}
for (final String pk : pks) {
try {
row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
WorkTables.calculateAPIScheduler(row, true); // calculate next update time
this.tables.update(WorkTables.TABLE_API_NAME, row);
} catch ( final Throwable e ) {
Log.logException(e);
continue;
}
}
startupAction = false;
// execute api calls
final Map<String, Integer> callResult = this.tables.execAPICalls("localhost", (int) getConfigLong("port", 8090), getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, ""), pks);
for ( final Map.Entry<String, Integer> call : callResult.entrySet() ) {
this.log.logInfo("Scheduler executed api call, response " + call.getValue() + ": " + call.getKey());
}
}
/**
* With this function the crawling process can be paused
*