Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

This commit is contained in:
Michael Peter Christen 2013-07-30 12:49:14 +02:00
commit 58fe986cca
33 changed files with 164 additions and 260 deletions

View File

@ -25,7 +25,7 @@
<key>Java</key>
<dict>
<key>VMOptions</key>
<string>-Xmx600m -Xms180m -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8</string>
<string>-Xmx600m -Xms180m -Xss256k -XX:MaxPermSize=256m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Dfile.encoding=UTF-8</string>
<key>WorkingDirectory</key>
<string>$APP_PACKAGE/Contents/Resources/Java</string>
<key>MainClass</key>

View File

@ -57,7 +57,7 @@ SHUTDOWN_TIMEOUT=50
# Default niceness if not set in config file
NICE_VAL=0
JAVA_ARGS="-server -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8"
JAVA_ARGS="-server -XX:MaxPermSize=256m -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8"
#check if system supports large memory pages and enable it if possible
HUGEPAGESTOTAL="$(cat /proc/meminfo | grep HugePages_Total | sed s/[^0-9]//g)"
if [ -n "$HUGEPAGESTOTAL" ] && [ $HUGEPAGESTOTAL -ne 0 ]

View File

@ -3,7 +3,7 @@ javacSource=1.6
javacTarget=1.6
# Release Configuration
releaseVersion=1.52
releaseVersion=1.6
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
sourceReleaseFile=yacy_src_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseFileParentDir=yacy

View File

@ -837,7 +837,7 @@ indexControl.gzipBody = true
indexControl.timeout = 60000
indexDistribution.timeout = 60000
indexTransfer.timeout = 120000
indexTransfer.maxload = 2.5
indexTransfer.maxload = 2.2
# defining max. allowed amount of open files during index- transfer/distribution
indexDistribution.maxOpenFiles = 800

View File

@ -99,4 +99,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = true
greedylearning.limit.doccount = 15000
greedylearning.limit.doccount = 1000

View File

@ -35,4 +35,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false
greedylearning.limit.doccount = 15000
greedylearning.limit.doccount = 1000

View File

@ -94,4 +94,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false
greedylearning.limit.doccount = 15000
greedylearning.limit.doccount = 1000

View File

@ -32,4 +32,4 @@ network.unit.access.blacklist =
# greedy learning: fast information acquisition heuristic for new peers
greedylearning.enabled = false
greedylearning.limit.doccount = 15000
greedylearning.limit.doccount = 1000

View File

@ -30,9 +30,6 @@
// if the shell's current path is HTROOT
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -63,18 +60,13 @@ public class BlacklistCleaner_p {
private static final String BLACKLISTS = "blacklists_";
private static final String ENTRIES = "entries_";
private final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static final Class<?>[] supportedBLEngines = {
Blacklist.class
};
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects();
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS"));
String blacklistToUse = null;
prop.put(DISABLED+"checked", "1");
@ -92,7 +84,7 @@ public class BlacklistCleaner_p {
}
}
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER), blacklistToUse);
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse);
if (blacklistToUse != null) {
prop.put("results", "1");
@ -125,7 +117,7 @@ public class BlacklistCleaner_p {
}
} else {
prop.put("results", "0");
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER), blacklistToUse);
putBlacklists(prop, FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER), blacklistToUse);
}
return prop;
@ -273,26 +265,13 @@ public class BlacklistCleaner_p {
* @return Length of the list of entries to be removed.
*/
private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
boolean listChanged = false;
// delete the old entry from file
for (final String entry : entries) {
String s = entry;
if (list != null){
// get rid of escape characters which make it impossible to
// properly use contains()
if (s.contains("\\\\")) {
s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\"));
}
if (list.contains(s)) {
listChanged = list.remove(s);
}
// get rid of escape characters which make it impossible to
// properly use contains()
if (s.contains("\\\\")) {
s = s.replaceAll(Pattern.quote("\\\\"), Matcher.quoteReplacement("\\"));
}
// remove the entry from the running blacklist engine
@ -301,7 +280,7 @@ public class BlacklistCleaner_p {
final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0));
final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1);
try {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
Switchboard.urlBlacklist.remove(supportedBlacklistType, blacklistToUse, host, path);
} catch (final RuntimeException e) {
ConcurrentLog.severe("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path);
}
@ -309,9 +288,6 @@ public class BlacklistCleaner_p {
}
SearchEventCache.cleanupEvents(true);
}
if (listChanged){
FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
}
return entries.length;
}
@ -329,34 +305,23 @@ public class BlacklistCleaner_p {
final String[] oldEntry,
final String[] newEntry) {
removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
PrintWriter pw = null;
try {
pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true));
String host, path;
for (final String n : newEntry) {
final int pos = n.indexOf('/',0);
if (pos < 0) {
host = n;
path = ".*";
} else {
host = n.substring(0, pos);
path = n.substring(pos + 1);
}
pw.println(host + "/" + path);
for (final BlacklistType s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(
s,
host,
path);
}
}
SearchEventCache.cleanupEvents(true);
}
pw.close();
} catch (final IOException e) {
ConcurrentLog.severe("BLACKLIST-CLEANER", "error on writing altered entries to blacklist", e);
}
String host, path;
for (final String n : newEntry) {
final int pos = n.indexOf('/',0);
if (pos < 0) {
host = n;
path = ".*";
} else {
host = n.substring(0, pos);
path = n.substring(pos + 1);
}
for (final BlacklistType s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(s, blacklistToUse, host, path);
}
}
SearchEventCache.cleanupEvents(true);
}
return newEntry.length;
}
}

View File

@ -29,12 +29,12 @@
// javac -classpath .:../classes Blacklist_p.java
// if the shell's current path is HTROOT
import java.io.File;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.ListManager;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed;
@ -49,12 +49,8 @@ public class BlacklistImpExp_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, @SuppressWarnings("unused") final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// loading all blacklist files located in the directory
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath);
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
String blacklistToUse = null;
final serverObjects prop = new serverObjects();
@ -84,7 +80,10 @@ public class BlacklistImpExp_p {
hostList.remove(peername);
peerCount++;
}
} catch (final Exception e) {/* */}
} catch (final Exception e) {
// Log exception for debug purposes ("catch-all catch")
ConcurrentLog.logException(e);
}
prop.put(DISABLED + "otherHosts", peerCount);
}
@ -93,10 +92,8 @@ public class BlacklistImpExp_p {
int count = 0;
for (String element : dirlist) {
if (element.endsWith(".black")) {
prop.putHTML("blackListNames_" + count + "_blackListName", element);
count++;
}
prop.putHTML("blackListNames_" + count + "_blackListName", element);
count++;
}
prop.put("blackListNames", count);

View File

@ -29,11 +29,9 @@
// javac -classpath .:../classes Blacklist_p.java
// if the shell's current path is HTROOT
import java.io.File;
import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.ListManager;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
@ -43,11 +41,7 @@ import net.yacy.server.serverSwitch;
public class BlacklistTest_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects();
prop.putHTML("blacklistEngine", Blacklist.getEngineInfo());

View File

@ -57,11 +57,7 @@ public class Blacklist_p {
private final static String BLACKLIST_SHARED = "BlackLists.Shared";
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
public static serverObjects respond(final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
// load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
@ -549,14 +545,8 @@ public class Blacklist_p {
}
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (Switchboard.urlBlacklist.getFileName(supportedBlacklistType) != blacklistToUse) {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
}
else {
Blacklist bl = new Blacklist(ListManager.listsPath);
bl.loadList(supportedBlacklistType, blacklistToUse, "/");
bl.remove(host, path);
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.remove(supportedBlacklistType, blacklistToUse, host, path);
}
}
@ -618,14 +608,8 @@ public class Blacklist_p {
String path = newEntry.substring(pos + 1);
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (Switchboard.urlBlacklist.getFileName(supportedBlacklistType) == blacklistToUse) {
Switchboard.urlBlacklist.add(supportedBlacklistType, host, path);
}
else {
Blacklist bl = new Blacklist(ListManager.listsPath);
bl.loadList(supportedBlacklistType, blacklistToUse, "/");
bl.add(supportedBlacklistType, host, path);
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(supportedBlacklistType, blacklistToUse, host, path);
}
}

View File

@ -44,6 +44,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -57,7 +58,7 @@ public class ConfigAppearance_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
final String skinPath = new File(env.getDataPath(), env.getConfig("skinPath", "DATA/SKINS")).toString();
final String skinPath = new File(env.getDataPath(), env.getConfig("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT)).toString();
// Fallback
prop.put("currentskin", "");

View File

@ -182,7 +182,12 @@ public class CrawlResults {
while (i.hasNext()) {
entry = i.next();
try {
urle = sb.index.fulltext().getMetadata(UTF8.getBytes(entry.getKey()));
byte[] urlhash = UTF8.getBytes(entry.getKey());
urle = sb.index.fulltext().getMetadata(urlhash);
if (urle == null) {
sb.index.fulltext().commit(true);
urle = sb.index.fulltext().getMetadata(urlhash);
}
if (urle == null) {
ConcurrentLog.warn("PLASMA", "CrawlResults: URL not in index with url hash " + entry.getKey());
urlstr = null;

View File

@ -25,10 +25,7 @@
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
@ -367,72 +364,58 @@ public class IndexControlRWIs_p {
URIMetadataRow.rowdef.objectOrder,
urlb.size());
if ( post.containsKey("blacklisturls") ) {
PrintWriter pw;
try {
final String[] supportedBlacklistTypes =
env.getConfig("BlackLists.types", "").split(",");
pw =
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
DigestURI url;
for ( final byte[] b : urlb ) {
try {
urlHashes.put(b);
} catch (final SpaceExceededException e ) {
ConcurrentLog.logException(e);
}
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( url != null ) {
pw.println(url.getHost() + "/" + url.getFile());
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
BlacklistType.valueOf(supportedBlacklistType),
url.getHost(),
url.getFile());
}
}
SearchEventCache.cleanupEvents(true);
}
}
pw.close();
} catch (final IOException e ) {
}
final String[] supportedBlacklistTypes =
env.getConfig("BlackLists.types", "").split(",");
DigestURI url;
for ( final byte[] b : urlb ) {
try {
urlHashes.put(b);
} catch (final SpaceExceededException e ) {
ConcurrentLog.logException(e);
}
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( url != null ) {
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
BlacklistType.valueOf(supportedBlacklistType),
blacklist,
url.getHost(),
url.getFile());
}
}
SearchEventCache.cleanupEvents(true);
}
}
}
if ( post.containsKey("blacklistdomains") ) {
PrintWriter pw;
try {
pw =
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
DigestURI url;
for ( final byte[] b : urlb ) {
try {
urlHashes.put(b);
} catch (final SpaceExceededException e ) {
ConcurrentLog.logException(e);
}
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( url != null ) {
pw.println(url.getHost() + "/.*");
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
supportedBlacklistType,
url.getHost(),
".*");
}
}
}
}
pw.close();
} catch (final IOException e ) {
}
DigestURI url;
for ( final byte[] b : urlb ) {
try {
urlHashes.put(b);
} catch (final SpaceExceededException e ) {
ConcurrentLog.logException(e);
}
url = segment.fulltext().getURL(b);
segment.fulltext().remove(b);
if ( url != null ) {
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
supportedBlacklistType,
blacklist,
url.getHost(),
".*");
}
}
}
}
}
try {
segment.termIndex().remove(keyhash, urlHashes);

View File

@ -13,7 +13,6 @@ public class blacklists {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects();
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath);
int blacklistCount = 0;

View File

@ -5,6 +5,7 @@ import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.ListManager;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -15,8 +16,7 @@ public class blacklists_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) {
final serverObjects prop = new serverObjects();
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath);
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
int blacklistCount=0;
final String blackListName = (post == null) ? "" : post.get("listname", "");

View File

@ -30,9 +30,7 @@
//if the shell's current path is HTROOT
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
@ -50,6 +48,7 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
@ -69,8 +68,6 @@ public class sharedBlacklist_p {
public static final int STATUS_WRONG_INVOCATION = 5;
public static final int STATUS_PARSE_ERROR = 6;
private final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
// return variable that accumulates replacements
@ -89,13 +86,8 @@ public class sharedBlacklist_p {
if (post != null) {
// initialize the list manager
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// loading all blacklist files located in the directory
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, BLACKLIST_FILENAME_FILTER);
final List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
// List BlackLists
int blacklistCount = 0;
@ -210,11 +202,7 @@ public class sharedBlacklist_p {
prop.put("page", "1"); //result page
prop.put("status", STATUS_ENTRIES_ADDED); //list of added Entries
PrintWriter pw = null;
try {
// open the blacklist file
pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, selectedBlacklistName), true));
// loop through the received entry list
final int num = post.getInt("num", 0);
for(int i = 0; i < num; i++){
@ -234,13 +222,10 @@ public class sharedBlacklist_p {
newItem = newItem + "/.*";
}
// append the item to the file
pw.println(newItem);
if (Switchboard.urlBlacklist != null) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1));
Switchboard.urlBlacklist.add(supportedBlacklistType,selectedBlacklistName,newItem.substring(0, pos), newItem.substring(pos + 1));
}
}
SearchEventCache.cleanupEvents(true);
@ -250,8 +235,6 @@ public class sharedBlacklist_p {
} catch (final Exception e) {
prop.put("status", "1");
prop.putHTML("status_error", e.getLocalizedMessage());
} finally {
if (pw != null) try { pw.close(); } catch (final Exception e){ /* */}
}
/* unable to use prop.putHTML() or prop.putXML() here because they

View File

@ -40,6 +40,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.Memory;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReferenceRow;
@ -105,7 +106,14 @@ public final class transferRWI {
String result = "ok";
final StringBuilder unknownURLs = new StringBuilder(6000);
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
double load = Memory.load();
float maxload = sb.getConfigFloat(SwitchboardConstants.INDEX_TRANSFER_MAXLOAD, 1.5f);
if (load > maxload) {
// too high local load. this is bad but we must reject this to protect ourself!
sb.getLog().info("Rejecting RWIs from peer " + otherPeerName + ", system has too high load = " + load + ", maxload = " + maxload);
result = "not_granted";
pause = (int) (load * 20000);
} else if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
sb.getLog().info("Rejecting RWIs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target";
pause = 0;

View File

@ -578,7 +578,7 @@ public class Balancer {
Map.Entry<String, byte[]> hosthash;
while (k.hasNext()) {
hosthash = k.next();
if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this!
//if (failoverCandidates.get(hosthash) > 2000) break; // thats too long; we want a second chance for this!
besthost = hosthash.getKey();
besturlhash = hosthash.getValue();
removeHashFromDomainStacks(besthost, besturlhash);

View File

@ -37,11 +37,6 @@ public class AugmentParser extends AbstractParser implements Parser {
public Document[] parse(DigestURI url, String mimeType, String charset, InputStream source) throws Parser.Failure, InterruptedException {
Document[] htmlDocs = this.rdfaParser.parse(url, mimeType, charset, source);
try {
source.reset();
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
for (final Document doc : htmlDocs) {
/* analyze(doc, url, mimeType, charset); // enrich document text */

View File

@ -206,7 +206,7 @@ public class htmlParser extends AbstractParser implements Parser {
throw new Parser.Failure("IO error:" + e.getMessage(), location);
} finally {
writer.flush();
sourceStream.close();
//sourceStream.close(); keep open for multipe parsing (close done by caller)
writer.close();
}
//OutputStream hfos = new htmlFilterOutputStream(null, scraper, null, false);

View File

@ -38,6 +38,7 @@ public class RDFaTripleImpl{
TransformerException, TransformerConfigurationException {
BufferedReader bufReader = new BufferedReader(in);
bufReader.mark(2048); // mark position for following reset
String readLine = bufReader.readLine();
if (!readLine.toLowerCase().contains("<!doctype")){
bufReader.reset();

View File

@ -251,6 +251,7 @@ public class Tables implements Iterable<String> {
try {
getHeap(tablename);
} catch (final IOException e) {
ConcurrentLog.logException(e);
}
}
}

View File

@ -99,7 +99,7 @@ public class migration {
* copy skins from the release to DATA/SKINS.
*/
public static void installSkins(final Switchboard sb){
final File skinsPath = sb.getDataPath("skinPath", "DATA/SKINS");
final File skinsPath = sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT);
final File defaultSkinsPath = new File(sb.getAppPath(), "skins");
if (defaultSkinsPath.exists()) {
final List<String> skinFiles = FileUtils.getDirListing(defaultSkinsPath.getAbsolutePath());
@ -118,7 +118,7 @@ public class migration {
if(skin.equals("")){
skin="default";
}
final File skinsDir=sb.getDataPath("skinPath", "DATA/SKINS");
final File skinsDir=sb.getDataPath("skinPath", SwitchboardConstants.SKINS_PATH_DEFAULT);
final File skinFile=new File(skinsDir, skin+".css");
final File htdocsPath=new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT), "env");
final File styleFile=new File(htdocsPath, "style.css");
@ -149,7 +149,7 @@ public class migration {
}
}
public static void migrateBookmarkTagsDB(final Switchboard sb){
sb.bookmarksDB.close();
if (sb.bookmarksDB != null) sb.bookmarksDB.close();
final File tagsDBFile=new File(sb.workPath, "bookmarkTags.db");
if(tagsDBFile.exists()){
delete(tagsDBFile);

View File

@ -56,6 +56,7 @@ import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.SetTools;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
public class Blacklist {
@ -131,7 +132,7 @@ public class Blacklist {
ConcurrentLog.fine("Blacklist", "All blacklists has been shutdown.");
}
public final void setRootPath(final File rootPath) {
private final void setRootPath(final File rootPath) {
if (rootPath == null) {
throw new NullPointerException("The blacklist root path must not be null.");
}
@ -153,10 +154,6 @@ public class Blacklist {
return this.cachedUrlHashs.get(blacklistType);
}
public final String getFileName(BlacklistType type) {
return blacklistFiles.get(type);
}
public final File getRootPath() {
return blacklistRootPath;
}
@ -172,8 +169,6 @@ public class Blacklist {
entry.clear();
}
blacklistFiles.clear();
blacklistRootPath = null;
}
public final int size() {
@ -270,16 +265,7 @@ public class Blacklist {
getBlacklistMap(blacklistType, false).remove(host);
}
/**
* Removes entry for all blacklist types.
*/
public final void remove(final String host, final String path) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
}
}
public final void remove(final BlacklistType blacklistType, final String host, final String path) {
public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
Set<Pattern> hostList = blacklistMap.get(host);
@ -300,7 +286,7 @@ public class Blacklist {
}
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, getFileName(blacklistType)));
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
// delete the old entry from file
if (list != null) {
@ -310,20 +296,11 @@ public class Blacklist {
break;
}
}
FileUtils.writeList(new File(ListManager.listsPath, getFileName(blacklistType)), list.toArray(new String[list.size()]));
FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
}
}
/**
* Adds a new blacklist entry for all types.
*/
public final void add(final String host, final String path) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
add(supportedBlacklistType, host, path);
}
}
public final void add(final BlacklistType blacklistType, final String host, final String path) {
public final void add(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {
if (contains(blacklistType, host, path)) {
return;
}
@ -354,12 +331,13 @@ public class Blacklist {
// Append the line to the file.
PrintWriter pw = null;
try {
try {
final String newEntry = h + "/" + pattern;
if (!blacklistFileContains(blacklistRootPath,
getFileName(blacklistType), pattern.toString())) {
blacklistToUse, newEntry)) {
pw = new PrintWriter(new FileWriter(new File(blacklistRootPath,
getFileName(blacklistType)), true));
pw.println(pattern);
blacklistToUse), true));
pw.println(newEntry);
pw.close();
}
} catch (final IOException e) {
@ -370,7 +348,7 @@ public class Blacklist {
pw.close();
} catch (final Exception e) {
ConcurrentLog.warn("Blacklist", "could not close stream to " +
getFileName(blacklistType) + "! " + e.getMessage());
blacklistToUse + "! " + e.getMessage());
}
}
}
@ -630,7 +608,7 @@ public class Blacklist {
}
private static File DHTCacheFile(final BlacklistType type) {
String BLACKLIST_DHT_CACHEFILE_NAME = "DATA/LISTS/blacklist_" + type.name() + "_Cache.ser";
final String BLACKLIST_DHT_CACHEFILE_NAME = SwitchboardConstants.LISTS_PATH_DEFAULT + "/blacklist_" + type.name() + "_Cache.ser";
return new File(Switchboard.getSwitchboard().dataPath, BLACKLIST_DHT_CACHEFILE_NAME);
}

View File

@ -1045,7 +1045,7 @@ public final class Switchboard extends serverSwitch {
Long.MAX_VALUE,
30000,
Long.MAX_VALUE),
8000);
10000);
deployThread(
SwitchboardConstants.INDEX_DIST,
"DHT Distribution",
@ -2720,7 +2720,7 @@ public final class Switchboard extends serverSwitch {
return;
}
if ( !profile.indexText() && !profile.indexMedia() ) {
if ( profile != null && !profile.indexText() && !profile.indexMedia() ) {
//if (this.log.isInfo()) log.logInfo("Not Indexed Resource '" + queueEntry.url().toNormalform(false, true) + "': denied by profile rule, process case=" + processCase + ", profile name = " + queueEntry.profile().name());
addURLtoErrorDB(
url,
@ -3475,7 +3475,6 @@ public final class Switchboard extends serverSwitch {
}
}
} catch (final Throwable e) {
ConcurrentLog.logException(e);
}
}
}.start();

View File

@ -521,4 +521,9 @@ public final class SwitchboardConstants {
public static final String GREEDYLEARNING_ENABLED = "greedylearning.enabled";
public static final String GREEDYLEARNING_LIMIT_DOCCOUNT = "greedylearning.limit.doccount";
public static final String GREEDYLEARNING_ACTIVE = "greedylearning.active";
/*
* Skins
*/
public static final String SKINS_PATH_DEFAULT = "DATA/SKINS";
}

View File

@ -133,6 +133,7 @@ import org.apache.solr.common.SolrInputDocument;
}
}
} catch (final IOException ex) {
ConcurrentLog.warn("MIGRATION-REINDEX", "remove following query from list due to error, q=" + querylist.remove(0));
ConcurrentLog.logException(ex);
} finally {
sem.release();

View File

@ -201,7 +201,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
final DigestURI digestURI = md.url();
boolean allAttr = this.isEmpty();
if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, "");
add(doc, CollectionSchema.id, ASCII.String(md.hash()));
String us = digestURI.toNormalform(true);
add(doc, CollectionSchema.sku, us);
@ -354,7 +353,6 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
Set<ProcessType> processTypes = new LinkedHashSet<ProcessType>();
add(doc, CollectionSchema.id, id);
if (allAttr || contains(CollectionSchema.failreason_s)) add(doc, CollectionSchema.failreason_s, ""); // overwrite a possible fail reason (in case that there was a fail reason before)
String docurl = digestURI.toNormalform(true);
add(doc, CollectionSchema.sku, docurl);
@ -1005,11 +1003,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
BlockingQueue<String> ids = connector.concurrentIDsByQuery(CollectionSchema.host_s.getSolrFieldName() + ":\"" + host + "\"", 0, 1000000, 600000);
String id;
while ((id = ids.take()) != AbstractSolrConnector.POISON_ID) {
crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}
this.crt.put(ASCII.getBytes(id), new double[]{0.0d,0.0d}); //{old value, new value}
}
} catch (final InterruptedException e2) {
}
this.cr_host_count = crt.size();
this.cr_host_count = this.crt.size();
double initval = 1.0d / cr_host_count;
for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) entry.getValue()[0] = initval;
this.internal_links_counter = new RowHandleMap(12, Base64Order.enhancedCoder, 8, 100, "internal_links_counter");
@ -1019,8 +1017,8 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* @return
*/
public Map<byte[], CRV> normalize() {
TreeMap<Double, List<byte[]>> reorder = new TreeMap<Double, List<byte[]>>();
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) {
final TreeMap<Double, List<byte[]>> reorder = new TreeMap<Double, List<byte[]>>();
for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
Double d = entry.getValue()[0];
List<byte[]> ds = reorder.get(d);
if (ds == null) {ds = new ArrayList<byte[]>(); reorder.put(d, ds);}
@ -1103,7 +1101,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
boolean convergence = true;
double df = (1.0d - damping) / this.cr_host_count;
try {
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) {
for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
byte[] id = entry.getKey();
ReferenceReport rr = this.rrCache.getReferenceReport(id, false);
// sum up the cr of the internal links
@ -1112,7 +1110,14 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
for (byte[] iid: iids) {
int ilc = getInternalLinks(iid);
if (ilc > 0) { // if (ilc == 0) then the reference report is wrong!
ncr += this.crt.get(iid)[0] / ilc;
double[] d = this.crt.get(iid);
// d[] could be empty at some situations
if (d.length > 0) {
ncr += d[0] / ilc;
} else {
// Output a warning that d[] is empty
ConcurrentLog.warn("COLLECTION", "d[] is empty, iid=" + iid);
}
}
}
ncr = df + damping * ncr;
@ -1120,7 +1125,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
entry.getValue()[1] = ncr;
}
// after the loop, replace the old value with the new value in crt
for (Map.Entry<byte[], double[]> entry: crt.entrySet()) {
for (Map.Entry<byte[], double[]> entry: this.crt.entrySet()) {
entry.getValue()[0] = entry.getValue()[1];
}
} catch (final IOException e) {
@ -1189,7 +1194,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
* @param httpstatus
* @throws IOException
*/
public SolrInputDocument err(final DigestURI digestURI, String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
public SolrInputDocument err(final DigestURI digestURI, final String[] collections, final String failReason, final FailType failType, final int httpstatus) throws IOException {
final SolrInputDocument solrdoc = new SolrInputDocument();
add(solrdoc, CollectionSchema.id, ASCII.String(digestURI.hash()));
add(solrdoc, CollectionSchema.sku, digestURI.toNormalform(true));

View File

@ -18,7 +18,7 @@ if exist DATA\SETTINGS\httpProxy.conf GoTo :RENAMEINDEX
if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS
:STARTJAVA
set javacmd=%javacmd% -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
set javacmd=%javacmd% -XX:-UseGCOverheadLimit -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
Rem Starting YaCy
Echo Generated classpath:%CLASSPATH%
Echo JRE Parameters:%javacmd%

View File

@ -6,7 +6,7 @@ PIDFILE="yacy.pid"
OS="`uname`"
#get javastart args
JAVA_ARGS="-server -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8";
JAVA_ARGS="-server -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8";
#JAVA_ARGS="-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDetails $JAVA_ARGS";
#check if OS is Sun Solaris or one of the OpenSolaris distributions and use different version of id if necessary

View File

@ -20,7 +20,7 @@ if exist DATA\SETTINGS\httpProxy.conf GoTo :RENAMEINDEX
if exist DATA\SETTINGS\yacy.conf GoTo :GETSTARTOPTS
:STARTJAVA
set javacmd=%javacmd% -Xss256k -XX:MaxPermSize=256m -XX:ReservedCodeCacheSize=1024m -XX:-UseGCOverheadLimit -XX:+UseAdaptiveSizePolicy -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
set javacmd=%javacmd% -XX:-UseGCOverheadLimit -Djava.net.preferIPv4Stack=true -Djava.awt.headless=true -Dfile.encoding=UTF-8
Rem Starting YaCy
Echo Generated classpath:%CLASSPATH%
Echo JRE Parameters:%javacmd%