mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merged master to 'heroku' branch.
This commit is contained in:
commit
b9c28893ee
|
@ -80,7 +80,7 @@ public class WorkTables extends Tables {
|
|||
public final static String TABLE_API_COL_APICALL_COUNT = "apicall_count"; // counts how often the API was called (starts with 1)
|
||||
public final static String TABLE_API_COL_APICALL_SCHEDULE_TIME = "apicall_schedule_time"; // factor for SCHEULE_UNIT time units
|
||||
public final static String TABLE_API_COL_APICALL_SCHEDULE_UNIT = "apicall_schedule_unit"; // may be 'minutes', 'hours', 'days'
|
||||
public final static String TABLE_API_COL_APICALL_EVENT_KIND = "apicall_event_kind"; //
|
||||
public final static String TABLE_API_COL_APICALL_EVENT_KIND = "apicall_event_kind"; //
|
||||
public final static String TABLE_API_COL_APICALL_EVENT_ACTION = "apicall_event_action"; //
|
||||
|
||||
public final static String TABLE_ROBOTS_NAME = "robots";
|
||||
|
@ -146,6 +146,7 @@ public class WorkTables extends Tables {
|
|||
|
||||
// insert APICALL attributes
|
||||
row.put(TABLE_API_COL_APICALL_COUNT, row.get(TABLE_API_COL_APICALL_COUNT, 1) + 1);
|
||||
calculateAPIScheduler(row, false); // set next execution time (as this might be a forward existing entry with schedule data)
|
||||
super.update(TABLE_API_NAME, row);
|
||||
assert pk != null;
|
||||
}
|
||||
|
@ -311,20 +312,24 @@ public class WorkTables extends Tables {
|
|||
Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, (Date) null) : null;
|
||||
date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
|
||||
if (date == null) return;
|
||||
long d = date.getTime();
|
||||
long d = 0;
|
||||
|
||||
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
|
||||
if ("off".equals(kind)) {
|
||||
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
|
||||
if (time <= 0) {
|
||||
int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, -1);
|
||||
if (time <= 0) { // no schedule time
|
||||
row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, "");
|
||||
return;
|
||||
}
|
||||
String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
|
||||
if (unit.equals("minutes")) d += 60000L * Math.max(10, time);
|
||||
if (unit.equals("hours")) d += 60000L * 60L * time;
|
||||
if (unit.equals("days")) d += 60000L * 60L * 24L * time;
|
||||
if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
|
||||
if (unit.equals("minutes")) d = 60000L * Math.max(10, time);
|
||||
if (unit.equals("hours")) d = hour * time;
|
||||
if (unit.equals("days")) d = day * time;
|
||||
if ((d + date.getTime()) < System.currentTimeMillis()) { // missed schedule
|
||||
d += System.currentTimeMillis(); // advance next exec from now
|
||||
} else {
|
||||
d += date.getTime(); // advance next exec from last execution
|
||||
}
|
||||
d -= d % 60000; // remove seconds
|
||||
} else {
|
||||
String action = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_ACTION, "startup");
|
||||
|
|
|
@ -290,7 +290,7 @@ public class YaCyDefaultServlet extends HttpServlet {
|
|||
|
||||
if (!hasClass && (resource == null || !resource.exists()) && !pathInContext.contains("..")) {
|
||||
// try to get this in the alternative htDocsPath
|
||||
resource = Resource.newResource(new File(HTTPDFileHandler.htDocsPath, pathInContext));
|
||||
resource = Resource.newResource(new File(_htDocsPath, pathInContext));
|
||||
}
|
||||
|
||||
if (ConcurrentLog.isFine("FILEHANDLER")) {
|
||||
|
@ -1033,8 +1033,15 @@ public class YaCyDefaultServlet extends HttpServlet {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String appendPath(String proplist, String path) {
|
||||
|
||||
/**
|
||||
* Append a path string to comma separated string of pathes if not already
|
||||
* contained in the proplist string
|
||||
* @param proplist comma separated string of pathes
|
||||
* @param path path to be appended
|
||||
* @return comma separated string of pathes including param path
|
||||
*/
|
||||
private String appendPath(String proplist, String path) {
|
||||
if (proplist.length() == 0) return path;
|
||||
if (proplist.contains(path)) return proplist;
|
||||
return proplist + "," + path;
|
||||
|
|
|
@ -201,10 +201,9 @@ public class BEncodedHeap implements MapStore {
|
|||
final Map<String, BDecoder.BObject> map = bobj.getMap();
|
||||
final Map<String, byte[]> m = new HashMap<String, byte[]>();
|
||||
for ( final Map.Entry<String, BDecoder.BObject> entry : map.entrySet() ) {
|
||||
if ( entry.getValue().getType() != BDecoder.BType.string ) {
|
||||
continue;
|
||||
}
|
||||
m.put(entry.getKey(), entry.getValue().getString());
|
||||
BObject ev = entry.getValue();
|
||||
if ( ev == null || ev.getType() != BDecoder.BType.string ) continue;
|
||||
m.put(entry.getKey(), ev.getString());
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
@ -688,11 +687,8 @@ public class BEncodedHeap implements MapStore {
|
|||
m.put("k", "222".getBytes());
|
||||
map.insert("789".getBytes(), m);
|
||||
// iterate over keys
|
||||
Map.Entry<byte[], Map<String, byte[]>> entry;
|
||||
final Iterator<Map.Entry<byte[], Map<String, byte[]>>> i = map.iterator();
|
||||
while ( i.hasNext() ) {
|
||||
entry = i.next();
|
||||
System.out.println(ASCII.String(entry.getKey()) + ": " + entry.getValue());
|
||||
for (Map.Entry<byte[], Map<String, byte[]>> entry : map) {
|
||||
System.out.println(ASCII.String(entry.getKey()) + ": " + ASCII.String(entry.getValue().values().iterator().next()));
|
||||
}
|
||||
// clean up
|
||||
map.close();
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.io.IOException;
|
|||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -256,17 +257,27 @@ public class Blacklist {
|
|||
loadList(blFile, sep);
|
||||
}
|
||||
|
||||
public final void removeAll(final BlacklistType blacklistType, final String host) {
|
||||
getBlacklistMap(blacklistType, true).remove(host);
|
||||
getBlacklistMap(blacklistType, false).remove(host);
|
||||
}
|
||||
|
||||
/**
|
||||
* remove the host/path from internal blacklist maps for given blacklistType
|
||||
* !! and removes the entry from source blacklist file !!
|
||||
* @param blacklistType
|
||||
* @param blacklistToUse
|
||||
* @param host
|
||||
* @param path
|
||||
*/
|
||||
public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
|
||||
|
||||
final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
|
||||
Set<Pattern> hostList = blacklistMap.get(host);
|
||||
if (hostList != null) {
|
||||
hostList.remove(path);
|
||||
// remove pattern from list (by comparing patternstring with path, remove(path) will not match path)
|
||||
for (Pattern hp : hostList) {
|
||||
String hpxs = hp.pattern();
|
||||
if (hpxs.equals(path)) {
|
||||
hostList.remove(hp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hostList.isEmpty()) {
|
||||
blacklistMap.remove(host);
|
||||
}
|
||||
|
@ -275,12 +286,21 @@ public class Blacklist {
|
|||
final Map<String, Set<Pattern>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
|
||||
hostList = blacklistMapNotMatch.get(host);
|
||||
if (hostList != null) {
|
||||
hostList.remove(path);
|
||||
// remove pattern from list
|
||||
for (Pattern hp : hostList) {
|
||||
String hpxs = hp.pattern();
|
||||
if (hpxs.equals(path)) {
|
||||
hostList.remove(hp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hostList.isEmpty()) {
|
||||
blacklistMapNotMatch.remove(host);
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: check if delete from blacklist is desired, on reload entry will not be available in any blacklist
|
||||
// even if remove (above) from internal maps (at runtime) is only done for given blacklistType
|
||||
// load blacklist data from file
|
||||
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
|
||||
|
||||
|
@ -297,9 +317,9 @@ public class Blacklist {
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Adds entry to a given blacklist internal data and updates the source file
|
||||
* @param blacklistType
|
||||
* @param blacklistToUse
|
||||
* @param blacklistToUse source file
|
||||
* @param host
|
||||
* @param path
|
||||
* @throws PunycodeException
|
||||
|
@ -362,7 +382,7 @@ public class Blacklist {
|
|||
}
|
||||
|
||||
/**
|
||||
* appends aN entry to the backlist source file.
|
||||
* appends aN entry to the backlist source file and updates internal blacklist maps.
|
||||
*
|
||||
* @param blacklistSourcefile name of the blacklist file (LISTS/*.black)
|
||||
* @param host host or host pattern
|
||||
|
@ -387,8 +407,21 @@ public class Blacklist {
|
|||
|
||||
if (!p.isEmpty() && p.charAt(0) == '*') {
|
||||
p = "." + p;
|
||||
}
|
||||
}
|
||||
Pattern pattern = Pattern.compile(p, Pattern.CASE_INSENSITIVE);
|
||||
|
||||
// update (put) pattern to internal blacklist maps (for which source is active)
|
||||
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
|
||||
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistSourcefile)) {
|
||||
final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(supportedBlacklistType, isMatchable(host));
|
||||
Set<Pattern> hostList;
|
||||
if (!(blacklistMap.containsKey(h) && ((hostList = blacklistMap.get(h)) != null))) {
|
||||
blacklistMap.put(h, (hostList = new HashSet<Pattern>()));
|
||||
}
|
||||
hostList.add(pattern);
|
||||
}
|
||||
}
|
||||
|
||||
// Append the line to the file.
|
||||
PrintWriter pw = null;
|
||||
try {
|
||||
|
@ -433,6 +466,14 @@ public class Blacklist {
|
|||
return s != null && s.has(urlHash);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check blacklist to contain given host & path pattern.
|
||||
* To check if a url matches a blacklist pattern, use isListed()
|
||||
* @param blacklistType
|
||||
* @param host
|
||||
* @param path
|
||||
* @return
|
||||
*/
|
||||
public final boolean contains(final BlacklistType blacklistType, final String host, final String path) {
|
||||
boolean ret = false;
|
||||
|
||||
|
@ -444,7 +485,13 @@ public class Blacklist {
|
|||
|
||||
final Set<Pattern> hostList = blacklistMap.get(h);
|
||||
if (hostList != null) {
|
||||
ret = hostList.contains(path);
|
||||
for (Pattern hp : hostList) {
|
||||
String hpxs = hp.pattern();
|
||||
if (hpxs.equals(path)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
|
|
@ -2190,6 +2190,10 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check scheduled api calls scheduled execution time and execute all jobs due
|
||||
* @return true if calls have been executed
|
||||
*/
|
||||
public boolean schedulerJob() {
|
||||
|
||||
// execute scheduled API actions
|
||||
|
@ -2198,15 +2202,22 @@ public final class Switchboard extends serverSwitch {
|
|||
final Date now = new Date();
|
||||
try {
|
||||
final Iterator<Tables.Row> plainIterator = this.tables.iterator(WorkTables.TABLE_API_NAME);
|
||||
final Iterator<Tables.Row> mapIterator = Tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_RECORDING).iterator();
|
||||
final Iterator<Tables.Row> mapIterator = Tables.orderBy(plainIterator, -1, WorkTables.TABLE_API_COL_DATE_LAST_EXEC).iterator();
|
||||
while (mapIterator.hasNext()) {
|
||||
row = mapIterator.next();
|
||||
if (row == null) continue;
|
||||
|
||||
// select api calls according to scheduler settings
|
||||
final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
|
||||
if (date_next_exec != null && now.after(date_next_exec)) pks.add(UTF8.String(row.getPK()));
|
||||
|
||||
final int stime = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 0);
|
||||
if (stime > 0) { // has scheduled repeat
|
||||
final Date date_next_exec = row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, (Date) null);
|
||||
if (date_next_exec != null) { // has been executed before
|
||||
if (now.after(date_next_exec)) pks.add(UTF8.String(row.getPK()));
|
||||
} else { // was never executed before
|
||||
pks.add(UTF8.String(row.getPK()));
|
||||
}
|
||||
}
|
||||
|
||||
// select api calls according to event settings
|
||||
final String kind = row.get(WorkTables.TABLE_API_COL_APICALL_EVENT_KIND, "off");
|
||||
if (!"off".equals(kind)) {
|
||||
|
@ -2237,16 +2248,6 @@ public final class Switchboard extends serverSwitch {
|
|||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
}
|
||||
for (final String pk : pks) {
|
||||
try {
|
||||
row = this.tables.select(WorkTables.TABLE_API_NAME, UTF8.getBytes(pk));
|
||||
WorkTables.calculateAPIScheduler(row, true); // calculate next update time
|
||||
this.tables.update(WorkTables.TABLE_API_NAME, row);
|
||||
} catch (final Throwable e ) {
|
||||
ConcurrentLog.logException(e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
startupAction = false;
|
||||
|
||||
// execute api calls
|
||||
|
|
43
test/java/net/yacy/repository/BlacklistTest.java
Normal file
43
test/java/net/yacy/repository/BlacklistTest.java
Normal file
|
@ -0,0 +1,43 @@
|
|||
package net.yacy.repository;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import net.yacy.cora.document.id.Punycode;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class BlacklistTest {
|
||||
|
||||
/**
|
||||
* Simulates contains method, of class Blacklist as proof for pattern.toString
|
||||
* needed and works
|
||||
*/
|
||||
@Test
|
||||
public void testContains() throws Punycode.PunycodeException {
|
||||
String path = ".*"; // simplest test pattern
|
||||
|
||||
Pattern pattern = Pattern.compile(path, Pattern.CASE_INSENSITIVE);
|
||||
|
||||
// pattern list as in Blacklist class
|
||||
// ConcurrentMap<BlacklistType, Map<String, Set<Pattern>>> hostpaths_matchable;
|
||||
// simulate last part, path pattern set
|
||||
Set<Pattern> hostList = new HashSet<Pattern>();
|
||||
hostList.add(pattern);
|
||||
|
||||
// proof assumption pattern(path) != path
|
||||
boolean ret = hostList.contains(path);
|
||||
assertFalse("match blacklist pattern " + path, ret);
|
||||
|
||||
// proof pattern.toString match works
|
||||
for (Pattern hp : hostList) {
|
||||
String hpxs = hp.pattern();
|
||||
if (hpxs.equals(path)) {
|
||||
ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue("match blacklist pattern " + path, ret);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user