Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

Conflicts:
	source/net/yacy/search/Switchboard.java
This commit is contained in:
Michael Peter Christen 2012-01-05 18:37:46 +01:00
commit 2ee8cbeb2c
71 changed files with 4040 additions and 1625 deletions

23
bin/checkalive.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash
# add in /etc/crontab
# 0 * * * * yacy cd /home/yacy/production/bin && ./checkalive.sh
RESULT=`wget --spider http://localhost:8090/Status.html 2>&1`
FLAG=0
for x in $RESULT; do
if [ "$x" = '200' ]; then
FLAG=1
fi
done
if [ $FLAG -eq '0' ]; then
cd ..
timeout 30s ./stopYACY.sh
./killYACY.sh
rm DATA/yacy.running
./startYACY.sh
fi
exit

View File

@ -41,18 +41,22 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
/** draw a banner with information about the peer */
public class Banner {
public class Banner
{
public static RasterPlotter respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws IOException {
public static RasterPlotter respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) throws IOException {
final Switchboard sb = (Switchboard) env;
final String IMAGE = "htroot/env/grafics/yacy.png";
int width = 468;
int height = 60;
String bgcolor = "e7effc";
String textcolor = "000000";
String bgcolor = "e7effc";
String textcolor = "000000";
String bordercolor = "5090d0";
if (post != null) {
if ( post != null ) {
bgcolor = post.get("bgcolor", bgcolor);
textcolor = post.get("textcolor", textcolor);
bordercolor = post.get("bordercolor", bordercolor);
@ -60,41 +64,41 @@ public class Banner {
height = post.getInt("heigth", height);
}
String name = "";
long links = 0;
long words = 0;
int myppm = 0;
double myqph = 0;
String type = "";
String name = "";
long links = 0;
long words = 0;
int myppm = 0;
double myqph = 0;
String type = "";
final String network = env.getConfig(SwitchboardConstants.NETWORK_NAME, "unspecified").toUpperCase();
final int peers = sb.peers.sizeConnected() + 1; // the '+ 1': the own peer is not included in sizeConnected()
long nlinks = sb.peers.countActiveURL();
long nwords = sb.peers.countActiveRWI();
final double nqpm = sb.peers.countActiveQPM();
long nppm = sb.peers.countActivePPM();
double nqph = 0;
final int peers = sb.peers.sizeConnected() + 1; // the '+ 1': the own peer is not included in sizeConnected()
long nlinks = sb.peers.countActiveURL();
long nwords = sb.peers.countActiveRWI();
final double nqpm = sb.peers.countActiveQPM();
long nppm = sb.peers.countActivePPM();
double nqph = 0;
final Seed seed = sb.peers.mySeed();
if (seed != null){
name = seed.get(Seed.NAME, "-").toUpperCase();
links = seed.getLinkCount();
words = seed.getWordCount();
myppm = seed.getPPM();
myqph = 60d * seed.getQPM();
if ( seed != null ) {
name = seed.get(Seed.NAME, "-").toUpperCase();
links = seed.getLinkCount();
words = seed.getWordCount();
myppm = seed.getPPM();
myqph = 60d * seed.getQPM();
if (sb.peers.mySeed().isVirgin()) {
if ( sb.peers.mySeed().isVirgin() ) {
type = "VIRGIN";
nqph = Math.round(6000d * nqpm) / 100d;
} else if(sb.peers.mySeed().isJunior()) {
} else if ( sb.peers.mySeed().isJunior() ) {
type = "JUNIOR";
nqph = Math.round(6000d * nqpm) / 100d;
} else if(sb.peers.mySeed().isSenior()) {
} else if ( sb.peers.mySeed().isSenior() ) {
type = "SENIOR";
nlinks = nlinks + links;
nwords = nwords + words;
nqph = Math.round(6000d * nqpm + 100d * myqph) / 100d;
nppm = nppm + myppm;
} else if(sb.peers.mySeed().isPrincipal()) {
} else if ( sb.peers.mySeed().isPrincipal() ) {
type = "PRINCIPAL";
nlinks = nlinks + links;
nwords = nwords + words;
@ -103,13 +107,48 @@ public class Banner {
}
}
if (!NetworkGraph.logoIsLoaded()) {
if ( !NetworkGraph.logoIsLoaded() ) {
ImageIO.setUseCache(false); // do not write a cache to disc; keep in RAM
final BufferedImage logo = ImageIO.read(new File(IMAGE));
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm, logo);
return NetworkGraph.getBannerPicture(
1000,
width,
height,
bgcolor,
textcolor,
bordercolor,
name,
links,
words,
type,
myppm,
network,
peers,
nlinks,
nwords,
nqph,
nppm,
logo);
}
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm);
return NetworkGraph.getBannerPicture(
1000,
width,
height,
bgcolor,
textcolor,
bordercolor,
name,
links,
words,
type,
myppm,
network,
peers,
nlinks,
nwords,
nqph,
nppm);
}
}

0
htroot/Collage.html Executable file → Normal file
View File

0
htroot/Collage.java Executable file → Normal file
View File

View File

@ -163,23 +163,9 @@
</dt>
<dd>Your search engine will not contact any other peer, and will reject every request.
</dd>
<!-- not yet implemented
<dt>
<label for="cluster.modePrivatecluster">Private Cluster</label>
<input type="radio" value="privatecluster" id="cluster.modePrivatecluster" name="cluster.mode"
#(privateclusterChecked)#::checked="checked" #(/privateclusterChecked)#/>
</dt>
<dd>
Your peer is part of a private cluster without public visibility.<br />
Index data is not distributed, but remote crawl requests are distributed and accepted from your cluster.<br />
Search requests are spread over all peers of the cluster, and answered from all peers of the cluster.<br />
List of ip:port - addresses of the cluster: (comma-separated)<br />
<input type="text" name="cluster.peers.ipport" value="#[cluster.peers.ipport]#" size="80" maxlength="800" />
</dd>
-->
<dt>
<label for="cluster.modePublicpeer">Public Peer</label>
<input type="radio" value="publicpeer" id="cluster.modePublicpeer" name="cluster.mode"
<label for="publicpeer">Public Peer</label>
<input type="radio" value="publicpeer" id="publicpeer" name="cluster.mode"
#(publicpeerChecked)#::checked="checked" #(/publicpeerChecked)#/>
</dt>
<dd>
@ -187,8 +173,8 @@
Your peer does not accept any outside index data, but responds on all remote search requests.
</dd>
<dt>
<label for="cluster.modePubliccluster">Public Cluster</label>
<input type="radio" value="publiccluster" id="cluster.modePubliccluster" name="cluster.mode"
<label for="publiccluster">Public Cluster</label>
<input type="radio" value="publiccluster" id="publiccluster" name="cluster.mode"
#(publicclusterChecked)#::checked="checked" #(/publicclusterChecked)#/>
</dt>
<dd>

View File

@ -40,26 +40,36 @@ import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class ConfigNetwork_p {
public class ConfigNetwork_p
{
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws FileNotFoundException, IOException {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) throws FileNotFoundException, IOException {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
int commit = 0;
// load all options for network definitions
final File networkBootstrapLocationsFile = new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
final File networkBootstrapLocationsFile =
new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
final Set<String> networkBootstrapLocations = FileUtils.loadList(networkBootstrapLocationsFile);
if (post != null) {
if ( post != null ) {
// store this call as api call
sb.tables.recordAPICall(post, "ConfigNetwork_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "network settings");
sb.tables.recordAPICall(
post,
"ConfigNetwork_p.html",
WorkTables.TABLE_API_TYPE_CONFIGURATION,
"network settings");
if (post.containsKey("changeNetwork")) {
final String networkDefinition = post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
if (networkDefinition.equals(sb.getConfig("network.unit.definition", ""))) {
if ( post.containsKey("changeNetwork") ) {
final String networkDefinition =
post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
if ( networkDefinition.equals(sb.getConfig("network.unit.definition", "")) ) {
// no change
commit = 3;
} else {
@ -69,49 +79,55 @@ public class ConfigNetwork_p {
}
}
if (post.containsKey("save")) {
if ( post.containsKey("save") ) {
// DHT control
boolean indexDistribute = "on".equals(post.get("indexDistribute", ""));
boolean indexReceive = "on".equals(post.get("indexReceive", ""));
if ( !indexReceive ) {
// remove heuristics
sb.setConfig("heuristic.site", false);
sb.setConfig("heuristic.scroogle", false);
sb.setConfig("heuristic.blekko", false);
}
final boolean robinsonmode = "robinson".equals(post.get("network", ""));
if (robinsonmode) {
if ( robinsonmode ) {
indexDistribute = false;
indexReceive = false;
commit = 1;
} else {
if (!indexDistribute && !indexReceive) {
if ( !indexDistribute && !indexReceive ) {
prop.put("commitDHTIsRobinson", "1");
commit = 2;
} else if (indexDistribute && indexReceive) {
} else if ( indexDistribute && indexReceive ) {
commit = 1;
} else {
if (!indexReceive) {
if ( !indexReceive ) {
prop.put("commitDHTNoGlobalSearch", "1");
}
commit = 1;
}
}
if (indexDistribute) {
if ( indexDistribute ) {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW, true);
} else {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW, false);
}
if ("on".equals(post.get("indexDistributeWhileCrawling",""))) {
if ( "on".equals(post.get("indexDistributeWhileCrawling", "")) ) {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true);
} else {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, false);
}
if ("on".equals(post.get("indexDistributeWhileIndexing",""))) {
if ( "on".equals(post.get("indexDistributeWhileIndexing", "")) ) {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true);
} else {
sb.setConfig(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, false);
}
if (indexReceive) {
if ( indexReceive ) {
sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
sb.peers.mySeed().setFlagAcceptRemoteIndex(true);
} else {
@ -120,19 +136,24 @@ public class ConfigNetwork_p {
sb.setConfig(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, false);
}
if ("on".equals(post.get("indexReceiveBlockBlacklist", ""))) {
if ( "on".equals(post.get("indexReceiveBlockBlacklist", "")) ) {
sb.setConfig("indexReceiveBlockBlacklist", true);
} else {
sb.setConfig("indexReceiveBlockBlacklist", false);
}
if (post.containsKey("peertags")) {
sb.peers.mySeed().setPeerTags(MapTools.string2set(normalizedList(post.get("peertags")), ","));
if ( post.containsKey("peertags") ) {
sb.peers.mySeed().setPeerTags(
MapTools.string2set(normalizedList(post.get("peertags")), ","));
}
sb.setConfig("cluster.mode", post.get("cluster.mode", "publicpeer"));
sb.setConfig("cluster.mode", post.get(
SwitchboardConstants.CLUSTER_MODE,
SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER));
sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", "")));
sb.setConfig("cluster.peers.yacydomain", checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
sb.setConfig(
"cluster.peers.yacydomain",
checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
// update the cluster hash set
sb.clusterhashes = sb.peers.clusterHashes(sb.getConfig("cluster.peers.yacydomain", ""));
@ -144,20 +165,34 @@ public class ConfigNetwork_p {
// write remote crawl request settings
prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? "1" : "0");
final long RTCbusySleep = Math.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
final long RTCbusySleep =
Math
.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
final int RTCppm = (int) (60000L / RTCbusySleep);
prop.put("acceptCrawlLimit", RTCppm);
final boolean indexDistribute = sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW, true);
final boolean indexReceive = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
prop.put("indexDistributeChecked", (indexDistribute) ? "1" : "0");
prop.put("indexDistributeWhileCrawling.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
prop.put("indexDistributeWhileCrawling.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
prop.put("indexDistributeWhileIndexing.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
prop.put("indexDistributeWhileIndexing.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
prop.put(
"indexDistributeWhileCrawling.on",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
prop.put(
"indexDistributeWhileCrawling.off",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
prop.put(
"indexDistributeWhileIndexing.on",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
prop.put(
"indexDistributeWhileIndexing.off",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
prop.put("indexReceiveChecked", (indexReceive) ? "1" : "0");
prop.put("indexReceiveBlockBlacklistChecked.on", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
prop.put("indexReceiveBlockBlacklistChecked.off", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
prop.put(
"indexReceiveBlockBlacklistChecked.on",
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
prop.put(
"indexReceiveBlockBlacklistChecked.off",
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
prop.putHTML("peertags", MapTools.set2string(sb.peers.mySeed().getPeerTags(), ",", false));
// set seed information directly
@ -170,20 +205,25 @@ public class ConfigNetwork_p {
prop.putHTML("cluster.peers.ipport", sb.getConfig("cluster.peers.ipport", ""));
prop.putHTML("cluster.peers.yacydomain", sb.getConfig("cluster.peers.yacydomain", ""));
StringBuilder hashes = new StringBuilder();
for (final byte[] h : sb.clusterhashes.keySet()) {
for ( final byte[] h : sb.clusterhashes.keySet() ) {
hashes.append(", ").append(ASCII.String(h));
}
if (hashes.length() > 2) {
if ( hashes.length() > 2 ) {
hashes = hashes.delete(0, 2);
}
prop.put("cluster.peers.yacydomain.hashes", hashes.toString());
// set p2p mode flags
prop.put("privatepeerChecked", ("privatepeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("privateclusterChecked", ("privatecluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("publicclusterChecked", ("publiccluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("publicpeerChecked", ("publicpeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put(
"privatepeerChecked",
(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
prop.put(
"publicclusterChecked",
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
prop.put(
"publicpeerChecked",
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
// set network configuration
prop.putHTML("network.unit.definition", sb.getConfig("network.unit.definition", ""));
@ -193,7 +233,7 @@ public class ConfigNetwork_p {
prop.putHTML("network.unit.dht", sb.getConfig("network.unit.dht", ""));
networkBootstrapLocations.remove(sb.getConfig("network.unit.definition", ""));
int c = 0;
for (final String s: networkBootstrapLocations) {
for ( final String s : networkBootstrapLocations ) {
prop.put("networks_" + c++ + "_network", s);
}
prop.put("networks", c);
@ -205,10 +245,10 @@ public class ConfigNetwork_p {
input = input.replace(' ', ',');
input = input.replace(' ', ';');
input = input.replaceAll(",,", ",");
if (input.length() > 0 && input.charAt(0) == ',') {
if ( input.length() > 0 && input.charAt(0) == ',' ) {
input = input.substring(1);
}
if (input.endsWith(",")) {
if ( input.endsWith(",") ) {
input = input.substring(0, input.length() - 1);
}
return input;
@ -217,14 +257,16 @@ public class ConfigNetwork_p {
private static String checkYaCyDomainList(final String input) {
final String[] array = normalizedList(input).split(",");
final StringBuilder output = new StringBuilder();
for (final String element : array) {
if ((element.endsWith(".yacyh")) || (element.endsWith(".yacy")) ||
(element.indexOf(".yacyh=",0) > 0) || (element.indexOf(".yacy=",0) > 0)) {
for ( final String element : array ) {
if ( (element.endsWith(".yacyh"))
|| (element.endsWith(".yacy"))
|| (element.indexOf(".yacyh=", 0) > 0)
|| (element.indexOf(".yacy=", 0) > 0) ) {
output.append(",").append(element);
}
}
if (output.length() == 0) {
if ( output.length() == 0 ) {
return input;
}
return output.delete(0, 1).toString();
@ -233,12 +275,12 @@ public class ConfigNetwork_p {
private static String checkIPPortList(final String input) {
final String[] array = normalizedList(input).split(",");
final StringBuilder output = new StringBuilder();
for (final String element :array) {
if (element.indexOf(':',0) >= 9) {
for ( final String element : array ) {
if ( element.indexOf(':', 0) >= 9 ) {
output.append(",").append(element);
}
}
if (input.length() == 0) {
if ( input.length() == 0 ) {
return input;
}
return output.delete(0, 1).toString();

0
htroot/IndexCleaner_p.html Executable file → Normal file
View File

0
htroot/IndexCleaner_p.java Executable file → Normal file
View File

View File

@ -69,6 +69,12 @@
<dd class="hint">You have not published your peer seed yet. This happens automatically, just wait.
</dd>
#(/urgentStatusVirgin)#
#(hintStatusPrivate)#::
<dt class="hintIcon"><img src="env/grafics/bad.png" width="32" height="32" alt="idea"/></dt>
<dd class="hint">Your network configuration is in private mode. Your peer seed will not be published.
</dd>
#(/hintStatusPrivate)#
<!-- warnings -->

View File

@ -46,67 +46,73 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Status {
public class Status
{
private static final String SEEDSERVER = "seedServer";
private static final String PEERSTATUS = "peerStatus";
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
// check if the basic configuration was accessed before and forward
prop.put("forwardToConfigBasic", 0);
if ((post == null || !post.containsKey("noforward")) &&
sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html",0) < 0 &&
Seed.isDefaultPeerName(sb.peers.mySeed().getName())) {
if ( (post == null || !post.containsKey("noforward"))
&& sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html", 0) < 0
&& Seed.isDefaultPeerName(sb.peers.mySeed().getName()) ) {
// forward to ConfigBasic
prop.put("forwardToConfigBasic", 1);
}
if (post != null) post.remove("noforward");
if ( post != null ) {
post.remove("noforward");
}
if (post != null && post.size() > 0) {
if (sb.adminAuthenticated(header) < 2) {
prop.put("AUTHENTICATE","admin log-in");
if ( post != null && post.size() > 0 ) {
if ( sb.adminAuthenticated(header) < 2 ) {
prop.put("AUTHENTICATE", "admin log-in");
return prop;
}
boolean redirect = false;
if (post.containsKey("login")) {
prop.put("LOCATION","");
if ( post.containsKey("login") ) {
prop.put("LOCATION", "");
return prop;
} else if (post.containsKey("pauseCrawlJob")) {
} else if ( post.containsKey("pauseCrawlJob") ) {
final String jobType = post.get("jobType");
if ("localCrawl".equals(jobType)) {
if ( "localCrawl".equals(jobType) ) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
} else if ("remoteTriggeredCrawl".equals(jobType)) {
} else if ( "remoteTriggeredCrawl".equals(jobType) ) {
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
}
redirect = true;
} else if (post.containsKey("continueCrawlJob")) {
} else if ( post.containsKey("continueCrawlJob") ) {
final String jobType = post.get("jobType");
if ("localCrawl".equals(jobType)) {
if ( "localCrawl".equals(jobType) ) {
sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
} else if ("remoteTriggeredCrawl".equals(jobType)) {
} else if ( "remoteTriggeredCrawl".equals(jobType) ) {
sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
}
redirect = true;
} else if (post.containsKey("ResetTraffic")) {
} else if ( post.containsKey("ResetTraffic") ) {
ByteCount.resetCount();
redirect = true;
} else if (post.containsKey("popup")) {
} else if ( post.containsKey("popup") ) {
final boolean trigger_enabled = post.getBoolean("popup", false);
sb.setConfig("browserPopUpTrigger", trigger_enabled);
redirect = true;
} else if (post.containsKey("tray")) {
} else if ( post.containsKey("tray") ) {
final boolean trigger_enabled = post.getBoolean("tray", false);
sb.setConfig("trayIcon", trigger_enabled);
redirect = true;
}
if (redirect) {
prop.put("LOCATION","");
return prop;
if ( redirect ) {
prop.put("LOCATION", "");
return prop;
}
}
@ -114,7 +120,7 @@ public class Status {
sb.updateMySeed();
final boolean adminaccess = sb.adminAuthenticated(header) >= 2;
if (adminaccess) {
if ( adminaccess ) {
prop.put("showPrivateTable", "1");
prop.put("privateStatusTable", "Status_p.inc");
} else {
@ -123,26 +129,28 @@ public class Status {
}
// password protection
if ((sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) {
if ( (sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0)
&& (!sb.getConfigBool("adminAccountForLocalhost", false)) ) {
prop.put("protection", "0"); // not protected
prop.put("urgentSetPassword", "1");
} else {
prop.put("protection", "1"); // protected
}
if (sb.getConfigBool("adminAccountForLocalhost", false)) {
if ( sb.getConfigBool("adminAccountForLocalhost", false) ) {
prop.put("unrestrictedLocalAccess", 1);
}
// resource observer status
if (adminaccess) {
if (!sb.observer.getDiskAvailable()){
if ( adminaccess ) {
if ( !sb.observer.getDiskAvailable() ) {
final String minFree = Formatter.bytesToString(sb.observer.getMinFreeDiskSpace());
prop.put("warningDiskSpaceLow", "1");
prop.put("warningDiskSpaceLow_minSpace", minFree);
}
if (!sb.observer.getMemoryAvailable()){
final String minFree = Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
if ( !sb.observer.getMemoryAvailable() ) {
final String minFree =
Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
prop.put("warningMemoryLow", "1");
prop.put("warningMemoryLow_minSpace", minFree);
}
@ -151,25 +159,26 @@ public class Status {
// version information
//final String versionstring = yacyVersion.combined2prettyVersion(sb.getConfig("version","0.1"));
final String versionstring = yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
final String versionstring =
yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
prop.put("versionpp", versionstring);
// place some more hints
if ((adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() == 0)) {
if ( (adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() == 0) ) {
prop.put("hintCrawlStart", "1");
}
if ((adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() > 500)) {
if ( (adminaccess) && (sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount() > 500) ) {
prop.put("hintCrawlMonitor", "1");
}
// hostname and port
final String extendedPortString = sb.getConfig("port", "8090");
final int pos = extendedPortString.indexOf(':',0);
prop.put("port",serverCore.getPortNr(extendedPortString));
if (pos != -1) {
final int pos = extendedPortString.indexOf(':', 0);
prop.put("port", serverCore.getPortNr(extendedPortString));
if ( pos != -1 ) {
prop.put("extPortFormat", "1");
prop.putHTML("extPortFormat_extPort",extendedPortString);
prop.putHTML("extPortFormat_extPort", extendedPortString);
} else {
prop.put("extPortFormat", "0");
}
@ -177,9 +186,9 @@ public class Status {
prop.put("host", hostIP != null ? hostIP.getHostAddress() : "Unkown IP");
// ssl support
prop.put("sslSupport",sb.getConfig("keyStore", "").length() == 0 ? "0" : "1");
prop.put("sslSupport", sb.getConfig("keyStore", "").length() == 0 ? "0" : "1");
if (sb.getConfigBool("remoteProxyUse", false)) {
if ( sb.getConfigBool("remoteProxyUse", false) ) {
prop.put("remoteProxy", "1");
prop.putXML("remoteProxy_host", sb.getConfig("remoteProxyHost", "<unknown>"));
prop.putXML("remoteProxy_port", sb.getConfig("remoteProxyPort", "<unknown>"));
@ -191,22 +200,24 @@ public class Status {
// peer information
String thisHash = "";
final String thisName = sb.peers.mySeed().getName();
if (sb.peers.mySeed() == null) {
if ( sb.peers.mySeed() == null ) {
thisHash = "not assigned";
prop.put("peerAddress", "0"); // not assigned
prop.put("peerAddress", "0"); // not assigned
prop.put("peerStatistics", "0"); // unknown
} else {
final long uptime = 60000 * sb.peers.mySeed().getLong(Seed.UPTIME, 0L);
prop.put("peerStatistics", "1");
prop.put("peerStatistics_uptime", PeerActions.formatInterval(uptime));
prop.putNum("peerStatistics_pagesperminute", sb.peers.mySeed().getPPM());
prop.putNum("peerStatistics_queriesperhour", Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
prop.putNum(
"peerStatistics_queriesperhour",
Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
prop.putNum("peerStatistics_links", sb.peers.mySeed().getLinkCount());
prop.put("peerStatistics_words", Formatter.number(sb.peers.mySeed().getWordCount()));
prop.putNum("peerStatistics_disconnects", sb.peers.peerActions.disconnects);
prop.put("peerStatistics_connects", Formatter.number(sb.peers.mySeed().get(Seed.CCOUNT, "0")));
thisHash = sb.peers.mySeed().hash;
if (sb.peers.mySeed().getPublicAddress() == null) {
if ( sb.peers.mySeed().getPublicAddress() == null ) {
prop.put("peerAddress", "0"); // not assigned + instructions
prop.put("warningGoOnline", "1");
} else {
@ -215,17 +226,25 @@ public class Status {
prop.putXML("peerAddress_peername", sb.peers.mySeed().getName().toLowerCase());
}
}
final String peerStatus = ((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN));
if (Seed.PEERTYPE_VIRGIN.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
final String peerStatus =
((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(
Seed.PEERTYPE,
Seed.PEERTYPE_VIRGIN));
if ( Seed.PEERTYPE_VIRGIN.equals(peerStatus)
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
prop.put(PEERSTATUS, "0");
prop.put("urgentStatusVirgin", "1");
} else if (Seed.PEERTYPE_JUNIOR.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
} else if ( Seed.PEERTYPE_JUNIOR.equals(peerStatus)
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
prop.put(PEERSTATUS, "1");
prop.put("warningStatusJunior", "1");
} else if (Seed.PEERTYPE_SENIOR.equals(peerStatus)) {
} else if ( Seed.PEERTYPE_SENIOR.equals(peerStatus) ) {
prop.put(PEERSTATUS, "2");
prop.put("hintStatusSenior", "1");
} else if (Seed.PEERTYPE_PRINCIPAL.equals(peerStatus)) {
} else if ( Seed.PEERTYPE_PRINCIPAL.equals(peerStatus) ) {
prop.put(PEERSTATUS, "3");
prop.put("hintStatusPrincipal", "1");
prop.put("hintStatusPrincipal_seedURL", sb.peers.mySeed().get(Seed.SEEDLISTURL, "?"));
@ -234,50 +253,52 @@ public class Status {
prop.put("hash", thisHash);
final String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
if (!"none".equalsIgnoreCase(seedUploadMethod) ||
("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 ||
sb.getConfig("seedFilePath", "").length() > 0))) {
if ("".equals(seedUploadMethod)) {
if (sb.getConfig("seedFTPPassword", "").length() > 0) {
sb.setConfig("seedUploadMethod","Ftp");
if ( !"none".equalsIgnoreCase(seedUploadMethod)
|| ("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 || sb
.getConfig("seedFilePath", "")
.length() > 0)) ) {
if ( "".equals(seedUploadMethod) ) {
if ( sb.getConfig("seedFTPPassword", "").length() > 0 ) {
sb.setConfig("seedUploadMethod", "Ftp");
}
if (sb.getConfig("seedFilePath", "").length() > 0) {
sb.setConfig("seedUploadMethod","File");
if ( sb.getConfig("seedFilePath", "").length() > 0 ) {
sb.setConfig("seedUploadMethod", "File");
}
}
if ("ftp".equalsIgnoreCase(seedUploadMethod)) {
if ( "ftp".equalsIgnoreCase(seedUploadMethod) ) {
prop.put(SEEDSERVER, "1"); // enabled
prop.putHTML("seedServer_seedServer", sb.getConfig("seedFTPServer", ""));
} else if ("scp".equalsIgnoreCase(seedUploadMethod)) {
} else if ( "scp".equalsIgnoreCase(seedUploadMethod) ) {
prop.put(SEEDSERVER, "1"); // enabled
prop.putHTML("seedServer_seedServer", sb.getConfig("seedScpServer", ""));
} else if ("file".equalsIgnoreCase(seedUploadMethod)) {
} else if ( "file".equalsIgnoreCase(seedUploadMethod) ) {
prop.put(SEEDSERVER, "2"); // enabled
prop.putHTML("seedServer_seedFile", sb.getConfig("seedFilePath", ""));
}
prop.put("seedServer_lastUpload",
PeerActions.formatInterval(System.currentTimeMillis() - sb.peers.lastSeedUpload_timeStamp));
prop.put(
"seedServer_lastUpload",
PeerActions.formatInterval(System.currentTimeMillis() - sb.peers.lastSeedUpload_timeStamp));
} else {
prop.put(SEEDSERVER, "0"); // disabled
}
if (sb.peers != null && sb.peers.sizeConnected() > 0){
if ( sb.peers != null && sb.peers.sizeConnected() > 0 ) {
prop.put("otherPeers", "1");
prop.putNum("otherPeers_num", sb.peers.sizeConnected());
}else{
} else {
prop.put("otherPeers", "0"); // not online
}
if (!sb.getConfigBool("browserPopUpTrigger", false)) {
if ( !sb.getConfigBool("browserPopUpTrigger", false) ) {
prop.put("popup", "0");
} else {
prop.put("popup", "1");
}
if (!OS.isWindows) {
prop.put("tray", "2");
} else if (!sb.getConfigBool("trayIcon", false)) {
if ( !OS.isWindows ) {
prop.put("tray", "2");
} else if ( !sb.getConfigBool("trayIcon", false) ) {
prop.put("tray", "0");
} else {
prop.put("tray", "1");
@ -305,18 +326,26 @@ public class Status {
final int loaderPercent = (loaderMaxCount == 0) ? 0 : loaderJobCount * 100 / loaderMaxCount;
prop.putNum("loaderQueueSize", loaderJobCount);
prop.putNum("loaderQueueMax", loaderMaxCount);
prop.put("loaderQueuePercent", (loaderPercent>100) ? 100 : loaderPercent);
prop.put("loaderQueuePercent", (loaderPercent > 100) ? 100 : loaderPercent);
prop.putNum("localCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
prop.put("localCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? "1" : "0");
prop.putNum("localCrawlQueueSize", sb
.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
.getJobCount());
prop.put("localCrawlPaused", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
? "1"
: "0");
prop.putNum("remoteTriggeredCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.put("remoteTriggeredCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
prop.putNum(
"remoteTriggeredCrawlQueueSize",
sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.put(
"remoteTriggeredCrawlPaused",
sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
prop.putNum("stackCrawlQueueSize", sb.crawlStacker.size());
// return rewrite properties
prop.put("date",(new Date()).toString());
prop.put("date", (new Date()).toString());
return prop;
}
}

0
htroot/api/bookmarks/posts/add_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/all.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/delete_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/get.java Executable file → Normal file
View File

0
htroot/api/bookmarks/tags/editTag_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/tags/getTag.java Executable file → Normal file
View File

0
htroot/api/bookmarks/xbel/xbel.java Executable file → Normal file
View File

0
htroot/api/feed.java Executable file → Normal file
View File

0
htroot/api/getpageinfo_p.java Executable file → Normal file
View File

0
htroot/api/ynetSearch.java Executable file → Normal file
View File

0
htroot/compare_yacy.html Executable file → Normal file
View File

0
htroot/compare_yacy.java Executable file → Normal file
View File

0
htroot/processing/domaingraph/applet/domaingraph.java Executable file → Normal file
View File

0
htroot/processing/domaingraph/applet/index.html Executable file → Normal file
View File

0
htroot/rssTerminal.html Executable file → Normal file
View File

0
htroot/terminal_p.html Executable file → Normal file
View File

File diff suppressed because it is too large Load Diff

319
skins/28c3.css Normal file
View File

@ -0,0 +1,319 @@
/* generic skin */
/*
The following colours must be defined:
#000000
#A3CC8B
#38A535
#E08040
#333333
#222222
#FFCCCC
#888888
#990000
#009900
#000099
#FFFFFF
#008000
#800000
*/
body {
background-color:#000000;
color:#A3CC8B;
}
a:link {
color:#A3CC8B;
background-color:transparent;
}
a:link:hover {
color: #38A535;
background-color:transparent;
}
/* Menu */
.menugroup h3 {
-webkit-border-top-left-radius: 5px;
-webkit-border-top-right-radius: 5px;
-khtml-border-top-left-radius: 5px;
-khtml-border-top-right-radius: 5px;
-moz-border-radius-topleft: 5px;
-moz-border-radius-topright: 5px;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
background-color: #E08040;
color:white;
}
.SubMenugroup h3, .SubMenu h3 {
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
background-color:#E08040;
color:white;
}
a.MenuItemLink, ul.SubMenu em {
background-color:#333333;
color:#A3CC8B;
}
a:hover.MenuItemLink {
background-color:transparent;
color:#A3CC8B;
}
/* Head */
div.head h1 {
background-color:transparent;
text-align:left;
padding-left:70px;
color:#A3CC8B;
}
/* Tables */
table {
}
.TableHeader {
background-color: #38A535;
color:white;
}
.TableCellDark {
background-color: #333333;
}
.TableCellLight {
background-color: #222222;
}
.TableCellSummary {
background-color: #FFCCCC;
border:1px solid #888888;
}
.TableCellActive {
background-color: #FFCCCC;
}
/* Blog and Wiki*/
.Post {
background-color:#000000;
}
.PostSubject {
background-color:#000000;
}
.PostSubject a {
color:#A3CC8B;
}
.PostInfo {
background-color:#000000;
}
/* Wiki */
.WikiTOCBox {
border: 1px solid #888888;
background-color: #000000;
}
a.unknown {
color:#990000;
}
a.known {
color:#009900;
}
a.extern {
color:#000099;
}
/* in Bookmarks */
.bookmark {
border-bottom:1px #888888 dashed;
}
a.bookmarkTitle {
color: #E08040;
}
a:hover.bookmarkTitle {
color: #E08040;
}
a.bookmarkTags {
color: #FFFFFF;
}
a:hover.bookmarkTags {
color: #E08040;
}
a.bookmarkAction {
color: #888888;
}
.Tags {
border-left: 2px solid #A3CC8B;
}
.diff { background-color: #000000; }
.diff .unchanged { color: #000099; }
.diff .added { color: #009900; background-color: #000000; }
.diff .deleted { color: #990000; background-color: #000000; }
/* in Status.html */
.ProgressBar {
border: #000000 solid 1px;
}
div.ProgressBarFill {
background-color:#333333;
}
/* Copyright info */
div#api {
position:absolute;
top:3px;
right:20px;
z-index: 100;
}
div#yacylivesearch {
float:right;
margin-right: 90px;
margin-top: -24px;
}
/* Searchresults */
fieldset.maininput, fieldset.yacys {
background-color:#333333;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
}
form.search.small h2 {
border-bottom:1px solid #888888;
}
.searchresults {
/*border-top:1px #888888 dashed;*/
}
.searchresults h4 a {
font-size:1.2em;
font-weight:normal;
text-decoration:underline;
color:#FFFFFF;
}
.searchresults h4 a:link:hover {
color:#FFFFFF;
}
.snippetLoaded strong {
color:black;
}
.searchresults .url a {
color:#008000;
}
.searchresults .url a:link:hover {
color:#800000;
}
/* other */
.settingsValue {
color:#000099;
}
.Headline {
background-color: #E08040;
color: white;
}
.Heading {
background-color: #E08040;
}
.error, .warning {
color:red;
}
.success {
color:green;
}
.Message {
background-color: #000000;
}
.example {
background-color:#000099;
}
.hides:hover .hoverShow { background-color: #000000; }
/* Log */
body#ViewLog pre {
background-color:white;
}
/* Forms */
fieldset {
background-color:#333333;
color:#A3CC8B;
border:0px solid #333333;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
}
/* input, select, textarea, button {
color: black;
background-color: white;
} */
legend {
background-color:#38A535;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
text-align:left;
color:white;
}
form dt, dl.pairs dt {
background-color:#333333;
font-weight:bold;
}
form dd, dl.pairs dd {
background-color:#333333;
}

View File

@ -142,11 +142,19 @@ public class CrawlQueues {
* @return if the hash exists, the name of the database is returned, otherwise null is returned
*/
public String urlExists(final byte[] hash) {
if (this.delegatedURL.exists(hash)) return "delegated";
if (this.errorURL.exists(hash)) return "errors";
if (this.noticeURL.existsInStack(hash)) return "crawler";
if (this.delegatedURL.exists(hash)) {
return "delegated";
}
if (this.errorURL.exists(hash)) {
return "errors";
}
if (this.noticeURL.existsInStack(hash)) {
return "crawler";
}
for (final Loader worker: this.workers.values()) {
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) return "worker";
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) {
return "worker";
}
}
return null;
}
@ -159,16 +167,26 @@ public class CrawlQueues {
public DigestURI getURL(final byte[] urlhash) {
assert urlhash != null;
if (urlhash == null || urlhash.length == 0) return null;
if (urlhash == null || urlhash.length == 0) {
return null;
}
ZURL.Entry ee = this.delegatedURL.get(urlhash);
if (ee != null) return ee.url();
if (ee != null) {
return ee.url();
}
ee = this.errorURL.get(urlhash);
if (ee != null) return ee.url();
if (ee != null) {
return ee.url();
}
for (final Loader w: this.workers.values()) {
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) return w.request.url();
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) {
return w.request.url();
}
}
final Request ne = this.noticeURL.get(urlhash);
if (ne != null) return ne.url();
if (ne != null) {
return ne.url();
}
return null;
}
@ -176,7 +194,9 @@ public class CrawlQueues {
// wait for all workers to finish
final int timeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
for (final Loader w: this.workers.values()) {
if (w.age() > timeout) w.interrupt();
if (w.age() > timeout) {
w.interrupt();
}
}
}
@ -185,7 +205,9 @@ public class CrawlQueues {
final Request[] e = new Request[this.workers.size()];
int i = 0;
for (final Loader w: this.workers.values()) {
if (i >= e.length) break;
if (i >= e.length) {
break;
}
e[i++] = w.request;
}
return e;
@ -195,14 +217,12 @@ public class CrawlQueues {
public int coreCrawlJobSize() {
return this.noticeURL.stackSize(NoticedURL.StackType.CORE) + this.noticeURL.stackSize(NoticedURL.StackType.NOLOAD);
}
public boolean coreCrawlJob() {
public boolean coreCrawlJob() {
final boolean robinsonPrivateCase = (this.sb.isRobinsonMode() &&
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER) &&
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_CLUSTER));
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER));
if ((robinsonPrivateCase || coreCrawlJobSize() <= 20) && limitCrawlJobSize() > 0) {
if ((robinsonPrivateCase || coreCrawlJobSize() <= 20) && limitCrawlJobSize() > 0) {
// move some tasks to the core crawl job so we have something to do
final int toshift = Math.min(10, limitCrawlJobSize()); // this cannot be a big number because the balancer makes a forced waiting if it cannot balance
for (int i = 0; i < toshift; i++) {
@ -216,12 +236,16 @@ public class CrawlQueues {
final String queueCheckCore = loadIsPossible(NoticedURL.StackType.CORE);
final String queueCheckNoload = loadIsPossible(NoticedURL.StackType.NOLOAD);
if (queueCheckCore != null && queueCheckNoload != null) {
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
if (this.log.isFine()) {
this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
}
return false;
}
if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: paused");
if (this.log.isFine()) {
this.log.logFine("omitting de-queue/local: paused");
}
return false;
}
@ -238,7 +262,9 @@ public class CrawlQueues {
if (this.noticeURL.stackSize(NoticedURL.StackType.NOLOAD) > 0) {
// get one entry that will not be loaded, just indexed
urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
if (urlEntry == null) continue;
if (urlEntry == null) {
continue;
}
final String profileHandle = urlEntry.profileHandle();
if (profileHandle == null) {
this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
@ -259,7 +285,9 @@ public class CrawlQueues {
}
urlEntry = this.noticeURL.pop(NoticedURL.StackType.CORE, true, this.sb.crawler);
if (urlEntry == null) continue;
if (urlEntry == null) {
continue;
}
final String profileHandle = urlEntry.profileHandle();
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
@ -271,7 +299,9 @@ public class CrawlQueues {
return true;
} catch (final IOException e) {
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.CORE);
if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.CORE);
}
}
}
return true;
@ -293,7 +323,7 @@ public class CrawlQueues {
final DigestURI url = urlEntry.url();
final String urlProtocol = url.getProtocol();
if (this.sb.loader.isSupportedProtocol(urlProtocol)) {
if (this.log.isFine())
if (this.log.isFine()) {
this.log.logFine(stats + ": URL=" + urlEntry.url()
+ ", initiator=" + ((urlEntry.initiator() == null) ? "" : ASCII.String(urlEntry.initiator()))
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
@ -302,6 +332,7 @@ public class CrawlQueues {
+ ", must-match=" + profile.urlMustMatchPattern().toString()
+ ", must-not-match=" + profile.urlMustNotMatchPattern().toString()
+ ", permission=" + ((this.sb.peers == null) ? "undefined" : (((this.sb.peers.mySeed().isSenior()) || (this.sb.peers.mySeed().isPrincipal())) ? "true" : "false")));
}
// work off one Crawl stack entry
if (urlEntry == null || urlEntry.url() == null) {
@ -387,23 +418,31 @@ public class CrawlQueues {
}
// check again
if (this.workers.size() >= this.sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 20)) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
}
return false;
}
final String cautionCause = this.sb.onlineCaution();
if (cautionCause != null) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
}
return false;
}
if (remoteTriggeredCrawlJobSize() > 200) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
}
return false;
}
if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
}
return false;
}
@ -414,27 +453,37 @@ public class CrawlQueues {
final Iterator<Seed> e = PeerSelection.getProvidesRemoteCrawlURLs(this.sb.peers);
while (e.hasNext()) {
seed = e.next();
if (seed != null) this.remoteCrawlProviderHashes.add(seed.hash);
if (seed != null) {
this.remoteCrawlProviderHashes.add(seed.hash);
}
}
}
}
if (this.remoteCrawlProviderHashes.isEmpty()) return false;
if (this.remoteCrawlProviderHashes.isEmpty()) {
return false;
}
// take one entry from the provider list and load the entries from the remote peer
seed = null;
String hash = null;
while (seed == null && !this.remoteCrawlProviderHashes.isEmpty()) {
hash = this.remoteCrawlProviderHashes.remove(this.remoteCrawlProviderHashes.size() - 1);
if (hash == null) continue;
if (hash == null) {
continue;
}
seed = this.sb.peers.get(hash);
if (seed == null) continue;
if (seed == null) {
continue;
}
// check if the peer is inside our cluster
if ((this.sb.isRobinsonMode()) && (!this.sb.isInMyCluster(seed))) {
seed = null;
continue;
}
}
if (seed == null) return false;
if (seed == null) {
return false;
}
// we know a peer which should provide remote crawl entries. load them now.
final RSSFeed feed = Protocol.queryRemoteCrawlURLs(this.sb.peers, seed, 60, 8000);
@ -467,7 +516,9 @@ public class CrawlQueues {
final String urlRejectReason = this.sb.crawlStacker.urlInAcceptedDomain(url);
if (urlRejectReason == null) {
// stack url
if (this.sb.getLog().isFinest()) this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
if (this.sb.getLog().isFinest()) {
this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
}
this.sb.crawlStacker.enqueueEntry(new Request(
ASCII.getBytes(hash),
url,
@ -514,12 +565,16 @@ public class CrawlQueues {
// or there is no global crawl on the stack
final String queueCheck = loadIsPossible(NoticedURL.StackType.REMOTE);
if (queueCheck != null) {
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: " + queueCheck);
if (this.log.isFinest()) {
this.log.logFinest("omitting de-queue/remote: " + queueCheck);
}
return false;
}
if (isPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: paused");
if (this.log.isFinest()) {
this.log.logFinest("omitting de-queue/remote: paused");
}
return false;
}
@ -536,7 +591,9 @@ public class CrawlQueues {
return true;
} catch (final IOException e) {
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.REMOTE);
if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.REMOTE);
}
return true;
}
}
@ -603,7 +660,9 @@ public class CrawlQueues {
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), true);
if (response == null) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
if (CrawlQueues.this.log.isFine()) {
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
}
result = "no content (possibly caused by cache policy)";
} else {
this.request.setStatus("loaded", WorkflowJob.STATUS_RUNNING);
@ -613,7 +672,9 @@ public class CrawlQueues {
}
} catch (final IOException e) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
if (CrawlQueues.this.log.isFine()) {
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
}
result = "load error - " + e.getMessage();
}
@ -644,9 +705,7 @@ public class CrawlQueues {
} finally {
final Loader w = CrawlQueues.this.workers.remove(this.code);
assert w != null;
}
}
}
}
}
}
}

0
source/de/anomic/crawler/NoticedURL.java Executable file → Normal file
View File

0
source/de/anomic/crawler/ResultImages.java Executable file → Normal file
View File

0
source/de/anomic/crawler/ZURL.java Executable file → Normal file
View File

0
source/de/anomic/crawler/retrieval/Request.java Executable file → Normal file
View File

0
source/de/anomic/crawler/retrieval/Response.java Executable file → Normal file
View File

View File

@ -48,14 +48,17 @@ public class DidYouMean {
private static final char[] ALPHABET_KANJI = new char[512];
static {
// this is very experimental: a very small subset of Kanji
for (char a = '\u3400'; a <= '\u34ff'; a++) ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
for (char a = '\u3400'; a <= '\u34ff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
}
for (char a = '\u4e00'; a <= '\u4eff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
}
}
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static char[] alphabet = ALPHABET_LATIN;
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
private final IndexCell<WordReference> index;
@ -66,6 +69,7 @@ public class DidYouMean {
private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written
private final SortedSet<StringBuilder> resultSet;
private final indexSizeComparator INDEX_SIZE_COMPARATOR;
private char[] alphabet;
/**
@ -88,25 +92,31 @@ public class DidYouMean {
boolean alphafound = false;
alphatest: for (final char[] alpha: ALPHABETS) {
if (isAlphabet(alpha, testchar)) {
alphabet = alpha;
this.alphabet = new char[alpha.length];
System.arraycopy(ALPHABET_LATIN, 0, this.alphabet, 0, alpha.length);
alphafound = true;
break alphatest;
}
}
if (!alphafound) {
// generate generic alphabet using simply a character block of 256 characters
final char firstchar = (char) ((0xff & (testchar / 256)) * 256);
final char lastchar = (char) (firstchar + 255);
alphabet = new char[256];
for (char a = firstchar; a <= lastchar; a++) {
alphabet[0xff & (a - firstchar)] = a;
final int firstchar = (0xff & (testchar / 256)) * 256;
final int lastchar = firstchar + 255;
this.alphabet = new char[256];
// test this with /suggest.json?q=%EF%BD%84
for (int a = firstchar; a <= lastchar; a++) {
this.alphabet[0xff & (a - firstchar)] = (char) a;
}
}
}
}
private static final boolean isAlphabet(final char[] alpha, final char testchar) {
for (final char a: alpha) if (a == testchar) return true;
for (final char a: alpha) {
if (a == testchar) {
return true;
}
}
return false;
}
@ -125,10 +135,15 @@ public class DidYouMean {
* @return
*/
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short
if (this.word.length() < MinimumInputWordLength)
{
return this.resultSet; // return nothing if input is too short
}
final long startTime = System.currentTimeMillis();
final long timelimit = startTime + timeout;
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) {
return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
}
final SortedSet<StringBuilder> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
@ -138,8 +153,12 @@ public class DidYouMean {
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
for (final StringBuilder s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
if (System.currentTimeMillis() > timelimit) {
break;
}
if (!(scored.sizeSmaller(2 * preSortSelection))) {
break;
}
scored.inc(s, this.index.count(Word.word2hash(s)));
}
final SortedSet<StringBuilder> countSorted = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
@ -147,8 +166,12 @@ public class DidYouMean {
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
final StringBuilder s = scored.getMaxKey();
final int score = scored.delete(s);
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);
if (System.currentTimeMillis() > timelimit) break;
if (s.length() >= MinimumOutputWordLength && score > wc) {
countSorted.add(s);
}
if (System.currentTimeMillis() > timelimit) {
break;
}
}
// finished
@ -180,11 +203,19 @@ public class DidYouMean {
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
StringBuilder sb;
for (int i = 0; i < words.length; i++) {
if (s[i].isEmpty()) continue;
if (s[i].isEmpty()) {
continue;
}
sb = new StringBuilder(20);
for (int j = 0; j < words.length; j++) {
if (j > 0) sb.append(' ');
if (i == j) sb.append(s[j].first()); else sb.append(words[j]);
if (j > 0) {
sb.append(' ');
}
if (i == j) {
sb.append(s[j].first());
} else {
sb.append(words[j]);
}
}
result.add(sb);
}
@ -211,10 +242,12 @@ public class DidYouMean {
// get a single recommendation for the word without altering the word
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(this.word);
for (final StringBuilder t: libr) {
if (!t.equals(this.word)) try {
this.createGen = false;
this.guessLib.put(t);
} catch (final InterruptedException e) {}
if (!t.equals(this.word)) {
try {
this.createGen = false;
this.guessLib.put(t);
} catch (final InterruptedException e) {}
}
}
// create and start producers
@ -226,34 +259,46 @@ public class DidYouMean {
producers[1] = new AddingOneLetter();
producers[2] = new DeletingOneLetter();
producers[3] = new ReversingTwoConsecutiveLetters();
for (final Thread t: producers) t.start();
for (final Thread t: producers) {
t.start();
}
// start more consumers if there are more cores
if (consumers.length > 1) for (int i = 1; i < consumers.length; i++) {
consumers[i] = new Consumer();
consumers[i].start();
if (consumers.length > 1) {
for (int i = 1; i < consumers.length; i++) {
consumers[i] = new Consumer();
consumers[i].start();
}
}
// now decide which kind of guess is better
// we take guessLib entries as long as there is any entry in it
// to see if this is the case, we must wait for termination of the producer
for (final Thread t: producers) try { t.join(); } catch (final InterruptedException e) {}
for (final Thread t: producers) {
try { t.join(); } catch (final InterruptedException e) {}
}
// if there is not any entry in guessLib, then transfer all entries from the
// guessGen to guessLib
if (this.createGen) try {
this.guessGen.put(POISON_STRING);
StringBuilder s;
while (!(s = this.guessGen.take()).equals(POISON_STRING)) this.guessLib.put(s);
} catch (final InterruptedException e) {}
if (this.createGen) {
try {
this.guessGen.put(POISON_STRING);
StringBuilder s;
while (!(s = this.guessGen.take()).equals(POISON_STRING)) {
this.guessLib.put(s);
}
} catch (final InterruptedException e) {}
}
// put poison into guessLib to terminate consumers
for (@SuppressWarnings("unused") final Consumer c: consumers)
for (@SuppressWarnings("unused") final Consumer c: consumers) {
try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {}
}
// wait for termination of consumer
for (final Consumer c: consumers)
for (final Consumer c: consumers) {
try { c.join(); } catch (final InterruptedException e) {}
}
// we don't want the given word in the result
this.resultSet.remove(this.word);
@ -265,7 +310,9 @@ public class DidYouMean {
private void test(final StringBuilder s) throws InterruptedException {
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) this.createGen = false;
if (!libr.isEmpty()) {
this.createGen = false;
}
for (final StringBuilder t: libr) {
this.guessLib.put(t);
}
@ -284,16 +331,20 @@ public class DidYouMean {
@Override
public void run() {
char m;
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
m = DidYouMean.this.word.charAt(i);
for (final char c: alphabet) {
if (m != c) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
test(ts);
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
m = DidYouMean.this.word.charAt(i);
for (final char c: DidYouMean.this.alphabet) {
if (m != c) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
test(ts);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
}
} catch (final InterruptedException e) {}
} catch (final InterruptedException e) {}
}
}
}
@ -306,11 +357,15 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
} catch (final InterruptedException e) {}
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -324,13 +379,17 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i <= DidYouMean.this.wordLen; i++) try {
for (final char c: alphabet) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
}
} catch (final InterruptedException e) {}
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
try {
for (final char c: DidYouMean.this.alphabet) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
}
}
@ -343,11 +402,15 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
} catch (final InterruptedException e) {}
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -364,8 +427,12 @@ public class DidYouMean {
StringBuilder s;
try {
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) DidYouMean.this.resultSet.add(s);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) {
DidYouMean.this.resultSet.add(s);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
@ -377,10 +444,13 @@ public class DidYouMean {
*/
private class indexSizeComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = DidYouMean.this.index.count(Word.word2hash(o1));
final int i2 = DidYouMean.this.index.count(Word.word2hash(o2));
if (i1 == i2) return WORD_LENGTH_COMPARATOR.compare(o1, o2);
if (i1 == i2) {
return WORD_LENGTH_COMPARATOR.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result
}
}
@ -391,10 +461,13 @@ public class DidYouMean {
*/
private static class wordLengthComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = o1.length();
final int i2 = o2.length();
if (i1 == i2) return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
if (i1 == i2) {
return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first
}
@ -411,10 +484,13 @@ public class DidYouMean {
this.secondaryComparator = secondaryComparator;
}
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final boolean o1m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o1, this.head);
final boolean o2m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o2, this.head);
if ((o1m && o2m) || (!o1m && !o2m)) return this.secondaryComparator.compare(o1, o2);
if ((o1m && o2m) || (!o1m && !o2m)) {
return this.secondaryComparator.compare(o1, o2);
}
return o1m ? -1 : 1;
}
}

0
source/net/yacy/ai/example/ConnectFour.java Executable file → Normal file
View File

0
source/net/yacy/ai/example/testorder.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/AbstractFinding.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/AbstractModel.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Agent.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Asset.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Attempts.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Battle.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Challenge.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Context.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Engine.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Finding.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Goal.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Model.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Role.java Executable file → Normal file
View File

0
source/net/yacy/cora/protocol/RequestHeader.java Executable file → Normal file
View File

0
source/net/yacy/cora/protocol/ResponseHeader.java Executable file → Normal file
View File

44
source/net/yacy/kelondro/blob/ArrayStack.java Executable file → Normal file
View File

@ -215,12 +215,14 @@ public class ArrayStack implements BLOB {
}
}
@Override
public long mem() {
long m = 0;
if (this.blobs != null) for (final blobItem b: this.blobs) m += b.blob.mem();
return m;
}
@Override
public void trim() {
// trim shall not be called for ArrayStacks because the characteristics of an ArrayStack is that the 'topmost' BLOB on the stack
// is used for write operations and all other shall be trimmed automatically since they are not used for writing. And the
@ -374,6 +376,7 @@ public class ArrayStack implements BLOB {
return new File(this.heapLocation, this.prefix + "." + my_SHORT_MILSEC_FORMATTER.format(creation) + ".blob");
}
@Override
public String name() {
return this.heapLocation.getName();
}
@ -414,12 +417,14 @@ public class ArrayStack implements BLOB {
/*
* return the size of the repository (in bytes)
*/
@Override
public synchronized long length() {
long s = 0;
for (int i = 0; i < this.blobs.size(); i++) s += this.blobs.get(i).location.length();
return s;
}
@Override
public ByteOrder ordering() {
return this.ordering;
}
@ -446,6 +451,7 @@ public class ArrayStack implements BLOB {
* ask for the length of the primary key
* @return the length of the key
*/
@Override
public int keylength() {
return this.keylength;
}
@ -454,6 +460,7 @@ public class ArrayStack implements BLOB {
* clears the content of the database
* @throws IOException
*/
@Override
public synchronized void clear() throws IOException {
for (final blobItem bi: this.blobs) {
bi.blob.clear();
@ -467,12 +474,14 @@ public class ArrayStack implements BLOB {
* ask for the number of blob entries
* @return the number of entries in the table
*/
@Override
public synchronized int size() {
int s = 0;
for (final blobItem bi: this.blobs) s += bi.blob.size();
return s;
}
@Override
public synchronized boolean isEmpty() {
for (final blobItem bi: this.blobs) if (!bi.blob.isEmpty()) return false;
return true;
@ -497,6 +506,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
assert rotating == false;
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
@ -514,6 +524,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
final Iterator<blobItem> i = this.blobs.iterator();
@ -529,6 +540,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized boolean containsKey(final byte[] key) {
final blobItem bi = keeperOf(key);
return bi != null;
@ -549,15 +561,28 @@ public class ArrayStack implements BLOB {
if (bi.blob.containsKey(key)) return bi;
return null;
}
// first check the current blob only because that has most probably the key if any has that key
int bs1 = this.blobs.size() - 1;
blobItem bi = this.blobs.get(bs1);
if (bi.blob.containsKey(key)) return bi;
if (this.blobs.size() == 2) {
// this should not be done concurrently
bi = this.blobs.get(0);
if (bi.blob.containsKey(key)) return bi;
return null;
}
// start a concurrent query to database tables
final CompletionService<blobItem> cs = new ExecutorCompletionService<blobItem>(this.executor);
int accepted = 0;
for (final blobItem bi : this.blobs) {
for (int i = 0; i < bs1; i++) {
final blobItem b = this.blobs.get(i);
try {
cs.submit(new Callable<blobItem>() {
@Override
public blobItem call() {
if (bi.blob.containsKey(key)) return bi;
if (b.blob.containsKey(key)) return b;
return null;
}
});
@ -565,7 +590,7 @@ public class ArrayStack implements BLOB {
} catch (final RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency
if (bi.blob.containsKey(key)) return bi;
if (b.blob.containsKey(key)) return b;
}
}
@ -599,6 +624,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
if (this.blobs.size() == 0) return null;
if (this.blobs.size() == 1) {
@ -619,6 +645,7 @@ public class ArrayStack implements BLOB {
*/
}
@Override
public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null;
try {
@ -652,6 +679,7 @@ public class ArrayStack implements BLOB {
this.key = key;
}
@Override
protected byte[] next0() {
while (this.bii.hasNext()) {
final BLOB b = this.bii.next().blob;
@ -677,6 +705,7 @@ public class ArrayStack implements BLOB {
* @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException
*/
@Override
public synchronized long length(final byte[] key) throws IOException {
long l;
for (final blobItem bi: this.blobs) {
@ -707,6 +736,7 @@ public class ArrayStack implements BLOB {
this.key = key;
}
@Override
protected Long next0() {
while (this.bii.hasNext()) {
final BLOB b = this.bii.next().blob;
@ -744,6 +774,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized void insert(final byte[] key, final byte[] b) throws IOException {
blobItem bi = (this.blobs.isEmpty()) ? null : this.blobs.get(this.blobs.size() - 1);
/*
@ -770,6 +801,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
int d = 0;
for (final blobItem bi: this.blobs) {
@ -784,6 +816,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized int reduce(final byte[] key, final Reducer reduce) throws IOException, RowSpaceExceededException {
int d = 0;
for (final blobItem bi: this.blobs) {
@ -797,6 +830,7 @@ public class ArrayStack implements BLOB {
* @param key the primary key
* @throws IOException
*/
@Override
public synchronized void delete(final byte[] key) throws IOException {
final long m = mem();
if (this.blobs.size() == 0) {
@ -812,6 +846,7 @@ public class ArrayStack implements BLOB {
// run this in a concurrent thread
final blobItem bi0 = bi;
t[i] = new Thread() {
@Override
public void run() {
try { bi0.blob.delete(key); } catch (final IOException e) {}
}
@ -831,6 +866,7 @@ public class ArrayStack implements BLOB {
/**
* close the BLOB
*/
@Override
public synchronized void close(final boolean writeIDX) {
for (final blobItem bi: this.blobs) bi.blob.close(writeIDX);
this.blobs.clear();

0
source/net/yacy/kelondro/blob/Heap.java Executable file → Normal file
View File

View File

@ -0,0 +1,540 @@
package net.yacy.kelondro.data.meta;
import net.yacy.cora.lod.Node;
import net.yacy.cora.lod.vocabulary.Rdf;
import net.yacy.kelondro.data.word.WordReferenceVars;
public class URIMetadataNode /*implements URIMetadata*/ {
private final Node entry;
private final String snippet;
private final WordReferenceVars word; // this is only used if the url is transported via remote search requests
private final long ranking; // during generation of a search result this value is set
public URIMetadataNode() {
// create a dummy entry, good to produce poison objects
this.entry = new Node(Rdf.Description);
this.snippet = null;
this.word = null;
this.ranking = 0;
}
/*
public URIMetadataNode(
final DigestURI url,
final String dc_title,
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lon, final float lat, // decimal degrees as in WGS84; if unknown both values may be 0.0f;
final Date mod,
final Date load,
final Date fresh,
final String referrer,
final byte[] md5,
final long size,
final int wc,
final char dt,
final Bitfield flags,
final byte[] lang,
final int llocal,
final int lother,
final int laudio,
final int limage,
final int lvideo,
final int lapp) {
// create new entry
this.entry = new Node();
this.entry.setSubject(UTF8.getBytes(url.toNormalform(true, false)));
this.entry.setObject(YaCyMetadata.hash, url.hash());
this.entry.setObject(DublinCore.Title, UTF8.getBytes(dc_title));
this.entry.setObject(DublinCore.Creator, UTF8.getBytes(dc_creator));
this.entry.setObject(DublinCore.Subject, UTF8.getBytes(dc_subject));
this.entry.setObject(DublinCore.Publisher, UTF8.getBytes(dc_publisher));
this.entry.setObject(Geo.Lat, ASCII.getBytes(Float.toString(lat)));
this.entry.setObject(Geo.Long, ASCII.getBytes(Float.toString(lon)));
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer));
this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, lang);
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage);
this.entry.setCol(col_laudio, laudio);
this.entry.setCol(col_lvideo, lvideo);
this.entry.setCol(col_lapp, lapp);
//System.out.println("===DEBUG=== " + load.toString() + ", " + decodeDate(col_load).toString());
this.snippet = null;
this.word = null;
this.ranking = 0;
this.comp = null;
}
private byte[] encodeDate(final Date d) {
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
// 86400000 is the number of milliseconds in one day
return NaturalOrder.encodeLong(d.getTime() / 86400000L, 4);
}
private Date decodeDate(final int col) {
final long t = this.entry.getColLong(col);
}
public static byte[] encodeComp(
final DigestURI url,
final String dc_title,
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lat,
final float lon) {
final CharBuffer s = new CharBuffer(360);
s.append(url.toNormalform(false, true)).appendLF();
s.append(dc_title).appendLF();
if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
s.appendLF();
if (dc_subject.length() > 120) s.append(dc_subject, 0, 120); else s.append(dc_subject);
s.appendLF();
if (dc_publisher.length() > 80) s.append(dc_publisher, 0, 80); else s.append(dc_publisher);
s.appendLF();
if (lon == 0.0f && lat == 0.0f) s.appendLF(); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).appendLF();
return UTF8.getBytes(s.toString());
}
public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
this.ranking = ranking;
this.comp = null;
}
public URIMetadataRow(final Properties prop) {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
DigestURI url;
try {
url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), ASCII.getBytes(prop.getProperty("hash")));
} catch (final MalformedURLException e) {
url = null;
}
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash()); // FIXME potential null pointer access
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
// create new formatters to make concurrency possible
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
encodeDate(col_mod, formatter.parse(prop.getProperty("mod", "20000101")));
} catch (final ParseException e) {
encodeDate(col_mod, new Date());
}
try {
encodeDate(col_load, formatter.parse(prop.getProperty("load", "20000101")));
} catch (final ParseException e) {
encodeDate(col_load, new Date());
}
try {
encodeDate(col_fresh, formatter.parse(prop.getProperty("fresh", "20000101")));
} catch (final ParseException e) {
encodeDate(col_fresh, new Date());
}
this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", "")));
this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
final String dt = prop.getProperty("dt", "t");
this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
final String flags = prop.getProperty("flags", "AAAAAA");
this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk")));
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));
this.entry.setCol(col_laudio, Integer.parseInt(prop.getProperty("laudio", "0")));
this.entry.setCol(col_lvideo, Integer.parseInt(prop.getProperty("lvideo", "0")));
this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
this.word = null;
if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
}
this.ranking = 0;
this.comp = null;
}
public static URIMetadataRow importEntry(final String propStr) {
if (propStr == null || (propStr.length() > 0 && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
return null;
}
try {
return new URIMetadataRow(MapTools.s2p(propStr.substring(1, propStr.length() - 1)));
} catch (final kelondroException e) {
// wrong format
return null;
}
}
private StringBuilder corePropList() {
// generate a parseable string; this is a simple property-list
final Components metadata = metadata();
final StringBuilder s = new StringBuilder(300);
if (metadata == null) return null;
//System.out.println("author=" + comp.author());
// create new formatters to make concurrency possible
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
s.append("hash=").append(ASCII.String(hash()));
assert (s.toString().indexOf(0) < 0);
s.append(",url=").append(crypt.simpleEncode(metadata.url().toNormalform(false, true)));
assert (s.toString().indexOf(0) < 0);
s.append(",descr=").append(crypt.simpleEncode(metadata.dc_title()));
assert (s.toString().indexOf(0) < 0);
s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
assert (s.toString().indexOf(0) < 0);
s.append(",tags=").append(crypt.simpleEncode(metadata.dc_subject()));
assert (s.toString().indexOf(0) < 0);
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
assert (s.toString().indexOf(0) < 0);
s.append(",lat=").append(metadata.lat());
assert (s.toString().indexOf(0) < 0);
s.append(",lon=").append(metadata.lon());
assert (s.toString().indexOf(0) < 0);
s.append(",mod=").append(formatter.format(moddate()));
assert (s.toString().indexOf(0) < 0);
s.append(",load=").append(formatter.format(loaddate()));
assert (s.toString().indexOf(0) < 0);
s.append(",fresh=").append(formatter.format(freshdate()));
assert (s.toString().indexOf(0) < 0);
s.append(",referrer=").append(referrerHash() == null ? "" : ASCII.String(referrerHash()));
assert (s.toString().indexOf(0) < 0);
s.append(",md5=").append(md5());
assert (s.toString().indexOf(0) < 0);
s.append(",size=").append(size());
assert (s.toString().indexOf(0) < 0);
s.append(",wc=").append(wordCount());
assert (s.toString().indexOf(0) < 0);
s.append(",dt=").append(doctype());
assert (s.toString().indexOf(0) < 0);
s.append(",flags=").append(flags().exportB64());
assert (s.toString().indexOf(0) < 0);
s.append(",lang=").append(language() == null ? "EN" : UTF8.String(language()));
assert (s.toString().indexOf(0) < 0);
s.append(",llocal=").append(llocal());
assert (s.toString().indexOf(0) < 0);
s.append(",lother=").append(lother());
assert (s.toString().indexOf(0) < 0);
s.append(",limage=").append(limage());
assert (s.toString().indexOf(0) < 0);
s.append(",laudio=").append(laudio());
assert (s.toString().indexOf(0) < 0);
s.append(",lvideo=").append(lvideo());
assert (s.toString().indexOf(0) < 0);
s.append(",lapp=").append(lapp());
assert (s.toString().indexOf(0) < 0);
if (this.word != null) {
// append also word properties
final String wprop = this.word.toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
assert (s.toString().indexOf(0) < 0);
return s;
} catch (final Throwable e) {
// serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
// if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
// if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
Log.logException(e);
return null;
}
}
public Row.Entry toRowEntry() {
return this.entry;
}
public byte[] hash() {
// return a url-hash, based on the md5 algorithm
// the result is a String of 12 bytes within a 72-bit space
// (each byte has an 6-bit range)
// that should be enough for all web pages on the world
return this.entry.getPrimaryKeyBytes();
}
public long ranking() {
return this.ranking;
}
public boolean matches(final Pattern matcher) {
return this.metadata().matches(matcher);
}
public DigestURI url() {
return this.metadata().url();
}
public String dc_title() {
return this.metadata().dc_title();
}
public String dc_creator() {
return this.metadata().dc_creator();
}
public String dc_publisher() {
return this.metadata().dc_publisher();
}
public String dc_subject() {
return this.metadata().dc_subject();
}
public float lat() {
return this.metadata().lat();
}
public float lon() {
return this.metadata().lon();
}
private Components metadata() {
// avoid double computation of metadata elements
if (this.comp != null) return this.comp;
// parse elements from comp field;
final byte[] c = this.entry.getColBytes(col_comp, true);
final List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
this.comp = new Components(
(cl.size() > 0) ? UTF8.String(cl.get(0)) : "",
hash(),
(cl.size() > 1) ? UTF8.String(cl.get(1)) : "",
(cl.size() > 2) ? UTF8.String(cl.get(2)) : "",
(cl.size() > 3) ? UTF8.String(cl.get(3)) : "",
(cl.size() > 4) ? UTF8.String(cl.get(4)) : "",
(cl.size() > 5) ? UTF8.String(cl.get(5)) : "");
return this.comp;
}
public Date moddate() {
return decodeDate(col_mod);
}
public Date loaddate() {
return decodeDate(col_load);
}
public Date freshdate() {
return decodeDate(col_fresh);
}
public byte[] referrerHash() {
// return the creator's hash or null if there is none
// FIXME: There seem to be some malformed entries in the databasees like "null\0\0\0\0\0\0\0\0"
final byte[] r = this.entry.getColBytes(col_referrer, true);
if (r != null) {
int i = r.length;
while (i > 0) {
if (r[--i] == 0) return null;
}
}
return r;
}
public String md5() {
// returns the md5 in hex representation
return Digest.encodeHex(this.entry.getColBytes(col_md5, true));
}
public char doctype() {
return (char) this.entry.getColByte(col_dt);
}
public byte[] language() {
byte[] b = this.entry.getColBytes(col_lang, true);
if (b == null || b[0] == (byte)'[') {
String tld = this.metadata().url.getTLD();
if (tld.length() < 2 || tld.length() > 2) return ASCII.getBytes("en");
return ASCII.getBytes(tld);
}
return b;
}
public int size() {
return (int) this.entry.getColLong(col_size);
}
public Bitfield flags() {
return new Bitfield(this.entry.getColBytes(col_flags, true));
}
public int wordCount() {
return (int) this.entry.getColLong(col_wc);
}
public int llocal() {
return (int) this.entry.getColLong(col_llocal);
}
public int lother() {
return (int) this.entry.getColLong(col_lother);
}
public int limage() {
return (int) this.entry.getColLong(col_limage);
}
public int laudio() {
return (int) this.entry.getColLong(col_laudio);
}
public int lvideo() {
return (int) this.entry.getColLong(col_lvideo);
}
public int lapp() {
return (int) this.entry.getColLong(col_lapp);
}
public String snippet() {
// the snippet may appear here if the url was transported in a remote search
// it will not be saved anywhere, but can only be requested here
return this.snippet;
}
public WordReferenceVars word() {
return this.word;
}
public boolean isOlder(final URIMetadata other) {
if (other == null) return false;
final Date tmoddate = moddate();
final Date omoddate = other.moddate();
if (tmoddate.before(omoddate)) return true;
if (tmoddate.equals(omoddate)) {
final Date tloaddate = loaddate();
final Date oloaddate = other.loaddate();
if (tloaddate.before(oloaddate)) return true;
if (tloaddate.equals(oloaddate)) return true;
}
return false;
}
public String toString(final String snippet) {
// add information needed for remote transport
final StringBuilder core = corePropList();
if (core == null)
return null;
core.ensureCapacity(core.length() + snippet.length() * 2);
core.insert(0, "{");
core.append(",snippet=").append(crypt.simpleEncode(snippet));
core.append("}");
return core.toString();
//return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
}
public Request toBalancerEntry(final String initiatorHash) {
return new Request(
ASCII.getBytes(initiatorHash),
metadata().url(),
referrerHash(),
metadata().dc_title(),
moddate(),
null,
0,
0,
0,
0);
}
@Override
public String toString() {
final StringBuilder core = corePropList();
if (core == null) return null;
core.insert(0, "{");
core.append("}");
return core.toString();
//return "{" + core + "}";
}
private class Components {
private DigestURI url;
private String urlRaw;
private byte[] urlHash;
private final String dc_title, dc_creator, dc_subject, dc_publisher;
private final String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
public Components(
final String urlRaw,
final byte[] urlhash,
final String title,
final String author,
final String tags,
final String publisher,
final String latlon) {
this.url = null;
this.urlRaw = urlRaw;
this.urlHash = urlhash;
this.dc_title = title;
this.dc_creator = author;
this.dc_subject = tags;
this.dc_publisher = publisher;
this.latlon = latlon;
}
public boolean matches(final Pattern matcher) {
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
return false;
}
public DigestURI url() {
if (this.url == null) {
try {
this.url = new DigestURI(this.urlRaw, this.urlHash);
} catch (final MalformedURLException e) {
this.url = null;
}
this.urlRaw = null;
this.urlHash = null;
}
return this.url;
}
public String dc_title() { return this.dc_title; }
public String dc_creator() { return this.dc_creator; }
public String dc_publisher() { return this.dc_publisher; }
public String dc_subject() { return this.dc_subject; }
public float lat() {
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
final int p = this.latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(0, p));
}
public float lon() {
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
final int p = this.latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
}
}
*/
}

View File

@ -157,7 +157,16 @@ public final class Records {
* @throws IOException
*/
private final long filesize() throws IOException {
return raf.length() / recordsize;
long records = 0;
try {
records = raf.length() / recordsize;
} catch (NullPointerException e) {
// This may happen on shutdown while still something is moving on
Log.logException(e);
}
return records;
}
/**

0
source/net/yacy/kelondro/table/Relations.java Executable file → Normal file
View File

0
source/net/yacy/kelondro/util/ISO639.java Executable file → Normal file
View File

View File

@ -61,16 +61,18 @@ import net.yacy.peers.operation.yacySeedUploadFtp;
import net.yacy.peers.operation.yacySeedUploadScp;
import net.yacy.peers.operation.yacySeedUploader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.server.serverCore;
public class Network {
public class Network
{
// statics
public static final ThreadGroup publishThreadGroup = new ThreadGroup("publishThreadGroup");
public static final HashMap<String, String> seedUploadMethods = new HashMap<String, String>();
public static final Log log = new Log("YACY");
public static long lastOnlineTime = 0;
/** pseudo-random key derived from a time-interval while YaCy startup*/
/** pseudo-random key derived from a time-interval while YaCy startup */
public static long speedKey = 0;
public static long magic = System.currentTimeMillis();
public static final Map<String, Accessible> amIAccessibleDB = new ConcurrentHashMap<String, Accessible>(); // Holds PeerHash / yacyAccessible Relations
@ -105,10 +107,10 @@ public class Network {
// ensure that correct IP is used
final String staticIP = sb.getConfig("staticIP", "");
if (staticIP.length() != 0 && Seed.isProperIP(staticIP) == null) {
if ( staticIP.length() != 0 && Seed.isProperIP(staticIP) == null ) {
serverCore.useStaticIP = true;
sb.peers.mySeed().setIP(staticIP);
log.logInfo("staticIP set to "+ staticIP);
log.logInfo("staticIP set to " + staticIP);
} else {
serverCore.useStaticIP = false;
}
@ -127,7 +129,9 @@ public class Network {
}
public final void publishSeedList() {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
if ( log.isFine() ) {
log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
}
/*
if (oldIPStamp.equals((String) seedDB.mySeed.get(yacySeed.IP, "127.0.0.1")))
@ -138,45 +142,48 @@ public class Network {
yacyCore.log.logDebug("***DEBUG publishSeedList: I can reach myself");
*/
if ((this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP())) &&
(this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected()) &&
(canReachMyself()) &&
(System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24) &&
(this.sb.peers.mySeed().isPrincipal())
) {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
if ( (this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP()))
&& (this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected())
&& (canReachMyself())
&& (System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24)
&& (this.sb.peers.mySeed().isPrincipal()) ) {
if ( log.isFine() ) {
log
.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
}
return;
}
// getting the seed upload method that should be used ...
final String seedUploadMethod = this.sb.getConfig("seedUploadMethod", "");
if (
(!seedUploadMethod.equalsIgnoreCase("none")) ||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0)) ||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0))
) {
if (seedUploadMethod.equals("")) {
if (this.sb.getConfig("seedFTPPassword", "").length() > 0) {
if ( (!seedUploadMethod.equalsIgnoreCase("none"))
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0))
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0)) ) {
if ( seedUploadMethod.equals("") ) {
if ( this.sb.getConfig("seedFTPPassword", "").length() > 0 ) {
this.sb.setConfig("seedUploadMethod", "Ftp");
}
if (this.sb.getConfig("seedFilePath", "").length() > 0) {
if ( this.sb.getConfig("seedFilePath", "").length() > 0 ) {
this.sb.setConfig("seedUploadMethod", "File");
}
}
// we want to be a principal...
saveSeedList(this.sb);
} else {
if (seedUploadMethod.equals("")) {
if ( seedUploadMethod.equals("") ) {
this.sb.setConfig("seedUploadMethod", "none");
}
if (log.isFine()) log.logFine("yacyCore.publishSeedList: No uploading method configured");
if ( log.isFine() ) {
log.logFine("yacyCore.publishSeedList: No uploading method configured");
}
return;
}
}
public final void peerPing() {
if ((this.sb.isRobinsonMode()) && (this.sb.getConfig("cluster.mode", "").equals("privatepeer"))) {
if ( (this.sb.isRobinsonMode())
&& (this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER)) ) {
// in case this peer is a privat peer we omit the peer ping
// all other robinson peer types do a peer ping:
// the privatecluster does the ping to the other cluster members
@ -189,42 +196,54 @@ public class Network {
this.sb.updateMySeed();
// publish own seed to other peer, this can every peer, but makes only sense for senior peers
if (this.sb.peers.sizeConnected() == 0) {
if ( this.sb.peers.sizeConnected() == 0 ) {
// reload the seed lists
this.sb.loadSeedLists();
log.logInfo("re-initialized seed list. received " + this.sb.peers.sizeConnected() + " new peer(s)");
log.logInfo("re-initialized seed list. received "
+ this.sb.peers.sizeConnected()
+ " new peer(s)");
}
final int newSeeds = publishMySeed(false);
if (newSeeds > 0) {
log.logInfo("received " + newSeeds + " new peer(s), know a total of " + this.sb.peers.sizeConnected() + " different peers");
if ( newSeeds > 0 ) {
log.logInfo("received "
+ newSeeds
+ " new peer(s), know a total of "
+ this.sb.peers.sizeConnected()
+ " different peers");
}
}
private boolean canReachMyself() { // TODO: check if this method is necessary - depending on the used router it will not work
// returns true if we can reach ourself under our known peer address
// if we cannot reach ourself, we call a forced publishMySeed and return false
final long[] callback = Protocol.queryUrlCount(this.sb.peers.mySeed());
if (callback[0] >= 0 && callback[1] == magic) {
final long[] callback = Protocol.queryUrlCount(this.sb.peers.mySeed());
if ( callback[0] >= 0 && callback[1] == magic ) {
this.sb.peers.mySeed().setLastSeenUTC();
return true;
}
log.logInfo("re-connect own seed");
final String oldAddress = this.sb.peers.mySeed().getPublicAddress();
/*final int newSeeds =*/ publishMySeed(true);
/*final int newSeeds =*/publishMySeed(true);
return (oldAddress != null && oldAddress.equals(this.sb.peers.mySeed().getPublicAddress()));
}
// use our own formatter to prevent concurrency locks with other processes
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second);
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(
GenericFormatter.FORMAT_SHORT_SECOND,
GenericFormatter.time_second);
protected class publishThread extends Thread {
protected class publishThread extends Thread
{
int added;
private final Seed seed;
private final Semaphore sync;
private final List<Thread> syncList;
public publishThread(final ThreadGroup tg, final Seed seed,
final Semaphore sync, final List<Thread> syncList) throws InterruptedException {
public publishThread(
final ThreadGroup tg,
final Seed seed,
final Semaphore sync,
final List<Thread> syncList) throws InterruptedException {
super(tg, "PublishSeed_" + seed.getName());
this.sync = sync;
@ -235,50 +254,106 @@ public class Network {
this.added = 0;
}
@Override
public final void run() {
try {
this.added = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed.getClusterAddress(), this.seed.hash, this.seed.getName());
if (this.added < 0) {
this.added =
Protocol.hello(
Network.this.sb.peers.mySeed(),
Network.this.sb.peers.peerActions,
this.seed.getClusterAddress(),
this.seed.hash,
this.seed.getName());
if ( this.added < 0 ) {
// no or wrong response, delete that address
final String cause = "peer ping to peer resulted in error response (added < 0)";
log.logInfo("publish: disconnected " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' from " + this.seed.getPublicAddress() + ": " + cause);
log.logInfo("publish: disconnected "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' from "
+ this.seed.getPublicAddress()
+ ": "
+ cause);
Network.this.sb.peers.peerActions.peerDeparture(this.seed, cause);
} else {
// success! we have published our peer to a senior peer
// update latest news from the other peer
log.logInfo("publish: handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress());
log.logInfo("publish: handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress());
// check if seed's lastSeen has been updated
final Seed newSeed = Network.this.sb.peers.getConnected(this.seed.hash);
if (newSeed != null) {
if (!newSeed.isOnline()) {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " is not online." +
" Removing Peer from connected");
if ( newSeed != null ) {
if ( !newSeed.isOnline() ) {
if ( log.isFine() ) {
log.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " is not online."
+ " Removing Peer from connected");
}
Network.this.sb.peers.peerActions.peerDeparture(newSeed, "peer not online");
} else
if (newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000)) {
} else if ( newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000) ) {
// update last seed date
if (newSeed.getLastSeenUTC() >= this.seed.getLastSeenUTC()) {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "'");
if ( newSeed.getLastSeenUTC() >= this.seed.getLastSeenUTC() ) {
if ( log.isFine() ) {
log
.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " with old LastSeen: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
.getLastSeenUTC())) + "'");
}
newSeed.setLastSeenUTC();
Network.this.sb.peers.peerActions.peerArrival(newSeed, true);
} else {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "', this is more recent: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(this.seed.getLastSeenUTC())) + "'");
if ( log.isFine() ) {
log
.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " with old LastSeen: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
.getLastSeenUTC()))
+ "', this is more recent: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(this.seed
.getLastSeenUTC()))
+ "'");
}
this.seed.setLastSeenUTC();
Network.this.sb.peers.peerActions.peerArrival(this.seed, true);
}
}
} else {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " not in connectedDB");
if ( log.isFine() ) {
log.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " not in connectedDB");
}
}
}
} catch (final Exception e) {
log.logSevere("publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(), e);
} catch ( final Exception e ) {
log.logSevere(
"publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(),
e);
} finally {
this.syncList.add(this);
this.sync.release();
@ -310,41 +385,54 @@ public class Network {
int attempts = this.sb.peers.sizeConnected();
// getting a list of peers to contact
if (this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN).equals(Seed.PEERTYPE_VIRGIN)) {
if (attempts > PING_INITIAL) { attempts = PING_INITIAL; }
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN).equals(Seed.PEERTYPE_VIRGIN) ) {
if ( attempts > PING_INITIAL ) {
attempts = PING_INITIAL;
}
final Map<byte[], String> ch = Switchboard.getSwitchboard().clusterhashes;
seeds = PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
seeds =
PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
// add also all peers from cluster if this is a public robinson cluster
if (ch != null) {
if ( ch != null ) {
final Iterator<Map.Entry<byte[], String>> i = ch.entrySet().iterator();
String hash;
Map.Entry<byte[], String> entry;
Seed seed;
while (i.hasNext()) {
while ( i.hasNext() ) {
entry = i.next();
hash = ASCII.String(entry.getKey());
seed = seeds.get(hash);
if (seed == null) {
if ( seed == null ) {
seed = this.sb.peers.get(hash);
if (seed == null) continue;
if ( seed == null ) {
continue;
}
}
seed.setAlternativeAddress(entry.getValue());
seeds.put(hash, seed);
}
}
}
} else {
int diff = PING_MIN_DBSIZE - amIAccessibleDB.size();
if (diff > PING_MIN_RUNNING) {
if ( diff > PING_MIN_RUNNING ) {
diff = Math.min(diff, PING_MAX_RUNNING);
if (attempts > diff) { attempts = diff; }
if ( attempts > diff ) {
attempts = diff;
}
} else {
if (attempts > PING_MIN_RUNNING) { attempts = PING_MIN_RUNNING; }
if ( attempts > PING_MIN_RUNNING ) {
attempts = PING_MIN_RUNNING;
}
}
seeds = PeerSelection.seedsByAge(this.sb.peers, false, attempts); // best for seed list maintenance/cleaning
}
if (seeds == null || seeds.isEmpty()) { return 0; }
if (seeds.size() < attempts) { attempts = seeds.size(); }
if ( seeds == null || seeds.isEmpty() ) {
return 0;
}
if ( seeds.size() < attempts ) {
attempts = seeds.size();
}
// This will try to get Peers that are not currently in amIAccessibleDB
final Iterator<Seed> si = seeds.values().iterator();
@ -353,12 +441,12 @@ public class Network {
// include a YaCyNews record to my seed
try {
final NewsDB.Record record = this.sb.peers.newsPool.myPublication();
if (record == null) {
if ( record == null ) {
this.sb.peers.mySeed().put("news", "");
} else {
this.sb.peers.mySeed().put("news", de.anomic.tools.crypt.simpleEncode(record.toString()));
}
} catch (final Exception e) {
} catch ( final Exception e ) {
log.logSevere("publishMySeed: problem with news encoding", e);
}
this.sb.peers.mySeed().setUnusedFlags();
@ -371,20 +459,25 @@ public class Network {
// going through the peer list and starting a new publisher thread for each peer
int i = 0;
while (si.hasNext()) {
while ( si.hasNext() ) {
seed = si.next();
if (seed == null) {
if ( seed == null ) {
sync.acquire();
continue;
}
i++;
final String address = seed.getClusterAddress();
if (log.isFine()) log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
if ( log.isFine() ) {
log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
}
final String seederror = seed.isProper(false);
if ((address == null) || (seederror != null)) {
if ( (address == null) || (seederror != null) ) {
// we don't like that address, delete it
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror);
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = "
+ address
+ "; seederror = "
+ seederror);
sync.acquire();
} else {
// starting a new publisher thread
@ -394,13 +487,13 @@ public class Network {
}
// receiving the result of all started publisher threads
for (int j = 0; j < contactedSeedCount; j++) {
for ( int j = 0; j < contactedSeedCount; j++ ) {
// waiting for the next thread to finish
sync.acquire();
// if this is true something is wrong ...
if (syncList.isEmpty()) {
if ( syncList.isEmpty() ) {
log.logWarning("PeerPing: syncList.isEmpty()==true");
continue;
//return 0;
@ -410,9 +503,9 @@ public class Network {
final publishThread t = (publishThread) syncList.remove(0);
// getting the amount of new reported seeds
if (t.added >= 0) {
if (newSeeds == -1) {
newSeeds = t.added;
if ( t.added >= 0 ) {
if ( newSeeds == -1 ) {
newSeeds = t.added;
} else {
newSeeds += t.added;
}
@ -423,33 +516,41 @@ public class Network {
int notaccessible = 0;
final long cutofftime = System.currentTimeMillis() - PING_MAX_DBAGE;
final int dbSize;
synchronized (amIAccessibleDB) {
synchronized ( amIAccessibleDB ) {
dbSize = amIAccessibleDB.size();
final Iterator<String> ai = amIAccessibleDB.keySet().iterator();
while (ai.hasNext()) {
while ( ai.hasNext() ) {
final Accessible ya = amIAccessibleDB.get(ai.next());
if (ya.lastUpdated < cutofftime) {
if ( ya.lastUpdated < cutofftime ) {
ai.remove();
} else {
if (ya.IWasAccessed) {
if ( ya.IWasAccessed ) {
accessible++;
} else {
notaccessible++;
}
}
}
if (log.isFine()) log.logFine("DBSize before -> after Cleanup: " + dbSize + " -> " + amIAccessibleDB.size());
if ( log.isFine() ) {
log
.logFine("DBSize before -> after Cleanup: "
+ dbSize
+ " -> "
+ amIAccessibleDB.size());
}
}
log.logInfo("PeerPing: I am accessible for " + accessible +
" peer(s), not accessible for " + notaccessible + " peer(s).");
log.logInfo("PeerPing: I am accessible for "
+ accessible
+ " peer(s), not accessible for "
+ notaccessible
+ " peer(s).");
if ((accessible + notaccessible) > 0) {
if ( (accessible + notaccessible) > 0 ) {
final String newPeerType;
// At least one other Peer told us our type
if ((accessible >= PING_MIN_PEERSEEN) ||
(accessible >= notaccessible)) {
if ( (accessible >= PING_MIN_PEERSEEN) || (accessible >= notaccessible) ) {
// We can be reached from a majority of other Peers
if (this.sb.peers.mySeed().isPrincipal()) {
if ( this.sb.peers.mySeed().isPrincipal() ) {
newPeerType = Seed.PEERTYPE_PRINCIPAL;
} else {
newPeerType = Seed.PEERTYPE_SENIOR;
@ -458,10 +559,14 @@ public class Network {
// We cannot be reached from the outside
newPeerType = Seed.PEERTYPE_JUNIOR;
}
if (this.sb.peers.mySeed().orVirgin().equals(newPeerType)) {
if ( this.sb.peers.mySeed().orVirgin().equals(newPeerType) ) {
log.logInfo("PeerPing: myType is " + this.sb.peers.mySeed().orVirgin());
} else {
log.logInfo("PeerPing: changing myType from '" + this.sb.peers.mySeed().orVirgin() + "' to '" + newPeerType + "'");
log.logInfo("PeerPing: changing myType from '"
+ this.sb.peers.mySeed().orVirgin()
+ "' to '"
+ newPeerType
+ "'");
this.sb.peers.mySeed().put(Seed.PEERTYPE, newPeerType);
}
} else {
@ -474,24 +579,33 @@ public class Network {
this.sb.peers.saveMySeed();
// if we have an address, we do nothing
if (this.sb.peers.mySeed().isProper(true) == null && !force) { return 0; }
if (newSeeds > 0) return newSeeds;
if ( this.sb.peers.mySeed().isProper(true) == null && !force ) {
return 0;
}
if ( newSeeds > 0 ) {
return newSeeds;
}
// still no success: ask own NAT or internet responder
//final boolean DI604use = switchboard.getConfig("DI604use", "false").equals("true");
//final String DI604pw = switchboard.getConfig("DI604pw", "");
final String ip = this.sb.getConfig("staticIP", "");
final String ip = this.sb.getConfig("staticIP", "");
//if (ip.equals("")) ip = natLib.retrieveIP(DI604use, DI604pw);
// yacyCore.log.logDebug("DEBUG: new IP=" + ip);
if (Seed.isProperIP(ip) == null) this.sb.peers.mySeed().setIP(ip);
if (this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR)) // ???????????????
if ( Seed.isProperIP(ip) == null ) {
this.sb.peers.mySeed().setIP(ip);
}
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR) ) {
this.sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); // to start bootstraping, we need to be recognised as PEERTYPE_SENIOR peer
log.logInfo("publish: no recipient found, our address is " +
((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers.mySeed().getPublicAddress()));
}
log.logInfo("publish: no recipient found, our address is "
+ ((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers
.mySeed()
.getPublicAddress()));
this.sb.peers.saveMySeed();
return 0;
} catch (final InterruptedException e) {
} catch ( final InterruptedException e ) {
try {
log.logInfo("publish: Interruption detected while publishing my seed.");
@ -499,32 +613,50 @@ public class Network {
Thread.interrupted();
// interrupt all already started publishThreads
log.logInfo("publish: Signaling shutdown to " + Network.publishThreadGroup.activeCount() + " remaining publishing threads ...");
log.logInfo("publish: Signaling shutdown to "
+ Network.publishThreadGroup.activeCount()
+ " remaining publishing threads ...");
Network.publishThreadGroup.interrupt();
// waiting some time for the publishThreads to finish execution
try { Thread.sleep(500); } catch (final InterruptedException ex) {}
try {
Thread.sleep(500);
} catch ( final InterruptedException ex ) {
}
// getting the amount of remaining publishing threads
int threadCount = Network.publishThreadGroup.activeCount();
int threadCount = Network.publishThreadGroup.activeCount();
final Thread[] threadList = new Thread[threadCount];
threadCount = Network.publishThreadGroup.enumerate(threadList);
// we need to use a timeout here because of missing interruptable session threads ...
if (log.isFine()) log.logFine("publish: Waiting for " + Network.publishThreadGroup.activeCount() + " remaining publishing threads to finish shutdown ...");
for (int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++) {
if ( log.isFine() ) {
log.logFine("publish: Waiting for "
+ Network.publishThreadGroup.activeCount()
+ " remaining publishing threads to finish shutdown ...");
}
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
final Thread currentThread = threadList[currentThreadIdx];
if (currentThread.isAlive()) {
if (log.isFine()) log.logFine("publish: Waiting for remaining publishing thread '" + currentThread.getName() + "' to finish shutdown");
try { currentThread.join(500); } catch (final InterruptedException ex) {}
if ( currentThread.isAlive() ) {
if ( log.isFine() ) {
log.logFine("publish: Waiting for remaining publishing thread '"
+ currentThread.getName()
+ "' to finish shutdown");
}
try {
currentThread.join(500);
} catch ( final InterruptedException ex ) {
}
}
}
log.logInfo("publish: Shutdown off all remaining publishing thread finished.");
} catch (final Exception ee) {
log.logWarning("publish: Unexpected error while trying to shutdown all remaining publishing threads.", e);
} catch ( final Exception ee ) {
log.logWarning(
"publish: Unexpected error while trying to shutdown all remaining publishing threads.",
e);
}
return 0;
@ -533,25 +665,27 @@ public class Network {
@SuppressWarnings("unchecked")
public static HashMap<String, String> getSeedUploadMethods() {
synchronized (Network.seedUploadMethods) {
synchronized ( Network.seedUploadMethods ) {
return (HashMap<String, String>) Network.seedUploadMethods.clone();
}
}
public static yacySeedUploader getSeedUploader(final String methodname) {
String className = null;
synchronized (Network.seedUploadMethods) {
if (Network.seedUploadMethods.containsKey(methodname)) {
synchronized ( Network.seedUploadMethods ) {
if ( Network.seedUploadMethods.containsKey(methodname) ) {
className = Network.seedUploadMethods.get(methodname);
}
}
if (className == null) { return null; }
if ( className == null ) {
return null;
}
try {
final Class<?> uploaderClass = Class.forName(className);
final Object uploader = uploaderClass.newInstance();
return (yacySeedUploader) uploader;
} catch (final Exception e) {
} catch ( final Exception e ) {
return null;
}
}
@ -559,19 +693,32 @@ public class Network {
public static void loadSeedUploadMethods() {
yacySeedUploader uploader;
uploader = new yacySeedUploadFile();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
uploader = new yacySeedUploadFtp();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
uploader = new yacySeedUploadScp();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
}
public static boolean changeSeedUploadMethod(final String method) {
if (method == null || method.length() == 0) { return false; }
if ( method == null || method.length() == 0 ) {
return false;
}
if (method.equalsIgnoreCase("none")) { return true; }
if ( method.equalsIgnoreCase("none") ) {
return true;
}
synchronized (Network.seedUploadMethods) {
synchronized ( Network.seedUploadMethods ) {
return Network.seedUploadMethods.containsKey(method);
}
}
@ -582,7 +729,7 @@ public class Network {
String logt;
// be shure that we have something to say
if (sb.peers.mySeed().getPublicAddress() == null) {
if ( sb.peers.mySeed().getPublicAddress() == null ) {
final String errorMsg = "We have no valid IP address until now";
log.logWarning("SaveSeedList: " + errorMsg);
return errorMsg;
@ -592,27 +739,30 @@ public class Network {
String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
// for backward compatiblity ....
if (seedUploadMethod.equalsIgnoreCase("Ftp") ||
(seedUploadMethod.equals("") &&
sb.getConfig("seedFTPPassword", "").length() > 0)) {
if ( seedUploadMethod.equalsIgnoreCase("Ftp")
|| (seedUploadMethod.equals("") && sb.getConfig("seedFTPPassword", "").length() > 0) ) {
seedUploadMethod = "Ftp";
sb.setConfig("seedUploadMethod", seedUploadMethod);
} else if (seedUploadMethod.equalsIgnoreCase("File") ||
(seedUploadMethod.equals("") &&
sb.getConfig("seedFilePath", "").length() > 0)) {
} else if ( seedUploadMethod.equalsIgnoreCase("File")
|| (seedUploadMethod.equals("") && sb.getConfig("seedFilePath", "").length() > 0) ) {
seedUploadMethod = "File";
sb.setConfig("seedUploadMethod", seedUploadMethod);
}
// determine the seed uploader that should be used ...
if (seedUploadMethod.equalsIgnoreCase("none")) { return "no uploader specified"; }
if ( seedUploadMethod.equalsIgnoreCase("none") ) {
return "no uploader specified";
}
final yacySeedUploader uploader = getSeedUploader(seedUploadMethod);
if (uploader == null) {
final String errorMsg = "Unable to get the proper uploader-class for seed uploading method '" + seedUploadMethod + "'.";
if ( uploader == null ) {
final String errorMsg =
"Unable to get the proper uploader-class for seed uploading method '"
+ seedUploadMethod
+ "'.";
log.logWarning("SaveSeedList: " + errorMsg);
return errorMsg;
}
@ -621,35 +771,51 @@ public class Network {
DigestURI seedURL;
try {
final String seedURLStr = sb.peers.mySeed().get(Seed.SEEDLISTURL, "");
if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); }
if (!(
seedURLStr.toLowerCase().startsWith("http://") ||
seedURLStr.toLowerCase().startsWith("https://")
)) {
if ( seedURLStr.length() == 0 ) {
throw new MalformedURLException("The seed-file url must not be empty.");
}
if ( !(seedURLStr.toLowerCase().startsWith("http://") || seedURLStr.toLowerCase().startsWith(
"https://")) ) {
throw new MalformedURLException("Unsupported protocol.");
}
seedURL = new DigestURI(seedURLStr);
} catch (final MalformedURLException e) {
final String errorMsg = "Malformed seed file URL '" + sb.peers.mySeed().get(Seed.SEEDLISTURL, "") + "'. " + e.getMessage();
} catch ( final MalformedURLException e ) {
final String errorMsg =
"Malformed seed file URL '"
+ sb.peers.mySeed().get(Seed.SEEDLISTURL, "")
+ "'. "
+ e.getMessage();
log.logWarning("SaveSeedList: " + errorMsg);
return errorMsg;
}
// upload the seed-list using the configured uploader class
String prevStatus = sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
if (prevStatus.equals(Seed.PEERTYPE_PRINCIPAL)) { prevStatus = Seed.PEERTYPE_SENIOR; }
if ( prevStatus.equals(Seed.PEERTYPE_PRINCIPAL) ) {
prevStatus = Seed.PEERTYPE_SENIOR;
}
try {
sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); // this information shall also be uploaded
if (log.isFine()) log.logFine("SaveSeedList: Using seed uploading method '" + seedUploadMethod + "' for seed-list uploading." +
"\n\tPrevious peerType is '" + sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR) + "'.");
if ( log.isFine() ) {
log.logFine("SaveSeedList: Using seed uploading method '"
+ seedUploadMethod
+ "' for seed-list uploading."
+ "\n\tPrevious peerType is '"
+ sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR)
+ "'.");
}
logt = sb.peers.uploadSeedList(uploader, sb, sb.peers, seedURL);
if (logt != null) {
if (logt.indexOf("Error",0) >= 0) {
if ( logt != null ) {
if ( logt.indexOf("Error", 0) >= 0 ) {
sb.peers.mySeed().put(Seed.PEERTYPE, prevStatus);
final String errorMsg = "SaveSeedList: seed upload failed using " + uploader.getClass().getName() + " (error): " + logt.substring(logt.indexOf("Error",0) + 6);
final String errorMsg =
"SaveSeedList: seed upload failed using "
+ uploader.getClass().getName()
+ " (error): "
+ logt.substring(logt.indexOf("Error", 0) + 6);
log.logSevere(errorMsg);
return errorMsg;
}
@ -659,7 +825,7 @@ public class Network {
// finally, set the principal status
sb.setConfig("yacyStatus", Seed.PEERTYPE_PRINCIPAL);
return null;
} catch (final Exception e) {
} catch ( final Exception e ) {
sb.peers.mySeed().put(Seed.PEERTYPE, prevStatus);
sb.setConfig("yacyStatus", prevStatus);
final String errorMsg = "SaveSeedList: Seed upload failed (IO error): " + e.getMessage();

File diff suppressed because it is too large Load Diff

0
source/net/yacy/peers/dht/Dispatcher.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/FlatWordPartitionScheme.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/PartitionScheme.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/PeerSelection.java Executable file → Normal file
View File

View File

View File

@ -29,13 +29,14 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@ -91,22 +92,22 @@ public class Blacklist {
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null;
private final Map<String, HandleSet> cachedUrlHashs;
private final Map<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final Map<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
setRootPath(rootPath);
// prepare the data structure
this.hostpaths_matchable = new HashMap<String, Map<String, List<String>>>();
this.hostpaths_notmatchable = new HashMap<String, Map<String, List<String>>>();
this.cachedUrlHashs = new HashMap<String, HandleSet>();
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) {
this.hostpaths_matchable.put(blacklistType, new HashMap<String, List<String>>());
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, List<String>>());
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}

View File

@ -64,6 +64,7 @@ import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.retrieval.Response;
import de.anomic.crawler.retrieval.SMBLoader;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.http.client.Cache;
public final class LoaderDispatcher {
@ -137,7 +138,7 @@ public final class LoaderDispatcher {
public void load(final DigestURI url, final CacheStrategy cacheStratgy, final int maxFileSize, final File targetFile) throws IOException {
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, false).getContent();
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, true).getContent();
if (b == null) throw new IOException("load == null");
final File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
@ -190,6 +191,12 @@ public final class LoaderDispatcher {
final String protocol = url.getProtocol();
final String host = url.getHost();
// check if url is in blacklist
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}
// check if we have the page in the cache
final CrawlProfile crawlProfile = this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
if (crawlProfile != null && cacheStrategy != CacheStrategy.NOCACHE) {
@ -324,7 +331,7 @@ public final class LoaderDispatcher {
*/
public byte[] loadContent(final Request request, final CacheStrategy cacheStrategy) throws IOException {
// try to download the resource using the loader
final Response entry = load(request, cacheStrategy, false);
final Response entry = load(request, cacheStrategy, true);
if (entry == null) return null; // not found in web
// read resource body (if it is there)
@ -334,7 +341,7 @@ public final class LoaderDispatcher {
public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int timeout, final int maxFileSize) throws IOException, Parser.Failure {
// load resource
final Response response = load(request, cacheStrategy, maxFileSize, false);
final Response response = load(request, cacheStrategy, maxFileSize, true);
final DigestURI url = request.url();
if (response == null) throw new IOException("no Response for url " + url);
@ -347,7 +354,7 @@ public final class LoaderDispatcher {
public ContentScraper parseResource(final DigestURI location, final CacheStrategy cachePolicy) throws IOException {
// load page
final Response r = this.load(request(location, true, false), cachePolicy, false);
final Response r = this.load(request(location, true, false), cachePolicy, true);
final byte[] page = (r == null) ? null : r.getContent();
if (page == null) throw new IOException("no response from url " + location.toString());
@ -366,7 +373,7 @@ public final class LoaderDispatcher {
* @throws IOException
*/
public final Map<MultiProtocolURI, String> loadLinks(final DigestURI url, final CacheStrategy cacheStrategy) throws IOException {
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, false);
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, true);
if (response == null) throw new IOException("response == null");
final ResponseHeader responseHeader = response.getResponseHeader();
if (response.getContent() == null) throw new IOException("resource == null");
@ -428,4 +435,4 @@ public final class LoaderDispatcher {
} catch (final MalformedURLException e) {} catch (final IOException e) {}
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -278,8 +278,8 @@ public final class SwitchboardConstants {
public static final String CLUSTER_MODE = "cluster.mode";
public static final String CLUSTER_MODE_PUBLIC_CLUSTER = "publiccluster";
public static final String CLUSTER_MODE_PRIVATE_CLUSTER = "privatecluster";
public static final String CLUSTER_MODE_PUBLIC_PEER = "publicpeer";
public static final String CLUSTER_MODE_PRIVATE_PEER = "privatepeer";
public static final String CLUSTER_PEERS_IPPORT = "cluster.peers.ipport";
public static final String DHT_BURST_ROBINSON = "network.unit.dht.burst.robinson";

View File

@ -93,6 +93,7 @@ public final class RWIProcess extends Thread
private final ReferenceOrder order;
private final long startTime;
private boolean addRunning;
private boolean fresh;
// navigation scores
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
@ -136,6 +137,7 @@ public final class RWIProcess extends Thread
this.maxExpectedRemoteReferences = new AtomicInteger(0);
this.expectedRemoteReferences = new AtomicInteger(0);
this.receivedRemoteReferences = new AtomicInteger(0);
this.fresh = true;
}
public void addExpectedRemoteReferences(int x) {
@ -388,10 +390,11 @@ public final class RWIProcess extends Thread
public void oneFeederStarted() {
this.feeders.addAndGet(1);
this.fresh = false;
}
public boolean feedingIsFinished() {
return this.feeders.get() <= 0;
return !this.fresh && this.feeders.get() <= 0;
}
private boolean testFlags(final WordReference ientry) {

View File

@ -27,6 +27,7 @@ package net.yacy.search.snippet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -48,7 +49,10 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.ByteArray;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.ZURL.FailCategory;
public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
@ -165,6 +169,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
entry = i.next();
url = new DigestURI(entry.getKey());
desc = entry.getValue();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
@ -189,6 +194,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
ientry = i.next();
url = new DigestURI(ientry.url());
final String u = url.toString();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -230,4 +236,27 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return remaininghashes;
}
/**
* Checks wether given URL is in blacklist for given blacklist type
*
* @param url The URL to check
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Wether the given URL is blacklisted
*/
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) {
// Default is not blacklisted
boolean isBlacklisted = false;
// check if url is in blacklist
if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), Switchboard.getSwitchboard().peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
Log.logFine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
isBlacklisted = true;
}
// Return result
return isBlacklisted;
}
}