Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

Conflicts:
	source/net/yacy/search/Switchboard.java
This commit is contained in:
Michael Peter Christen 2012-01-05 18:37:46 +01:00
commit 2ee8cbeb2c
71 changed files with 4040 additions and 1625 deletions

23
bin/checkalive.sh Executable file
View File

@ -0,0 +1,23 @@
#!/bin/bash
# add in /etc/crontab
# 0 * * * * yacy cd /home/yacy/production/bin && ./checkalive.sh
RESULT=`wget --spider http://localhost:8090/Status.html 2>&1`
FLAG=0
for x in $RESULT; do
if [ "$x" = '200' ]; then
FLAG=1
fi
done
if [ $FLAG -eq '0' ]; then
cd ..
timeout 30s ./stopYACY.sh
./killYACY.sh
rm DATA/yacy.running
./startYACY.sh
fi
exit

View File

@ -41,9 +41,13 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
/** draw a banner with information about the peer */
public class Banner {
public class Banner
{
public static RasterPlotter respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws IOException {
public static RasterPlotter respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) throws IOException {
final Switchboard sb = (Switchboard) env;
final String IMAGE = "htroot/env/grafics/yacy.png";
int width = 468;
@ -106,10 +110,45 @@ public class Banner {
if ( !NetworkGraph.logoIsLoaded() ) {
ImageIO.setUseCache(false); // do not write a cache to disc; keep in RAM
final BufferedImage logo = ImageIO.read(new File(IMAGE));
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm, logo);
return NetworkGraph.getBannerPicture(
1000,
width,
height,
bgcolor,
textcolor,
bordercolor,
name,
links,
words,
type,
myppm,
network,
peers,
nlinks,
nwords,
nqph,
nppm,
logo);
}
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm);
return NetworkGraph.getBannerPicture(
1000,
width,
height,
bgcolor,
textcolor,
bordercolor,
name,
links,
words,
type,
myppm,
network,
peers,
nlinks,
nwords,
nqph,
nppm);
}
}

0
htroot/Collage.html Executable file → Normal file
View File

0
htroot/Collage.java Executable file → Normal file
View File

View File

@ -163,23 +163,9 @@
</dt>
<dd>Your search engine will not contact any other peer, and will reject every request.
</dd>
<!-- not yet implemented
<dt>
<label for="cluster.modePrivatecluster">Private Cluster</label>
<input type="radio" value="privatecluster" id="cluster.modePrivatecluster" name="cluster.mode"
#(privateclusterChecked)#::checked="checked" #(/privateclusterChecked)#/>
</dt>
<dd>
Your peer is part of a private cluster without public visibility.<br />
Index data is not distributed, but remote crawl requests are distributed and accepted from your cluster.<br />
Search requests are spread over all peers of the cluster, and answered from all peers of the cluster.<br />
List of ip:port - addresses of the cluster: (comma-separated)<br />
<input type="text" name="cluster.peers.ipport" value="#[cluster.peers.ipport]#" size="80" maxlength="800" />
</dd>
-->
<dt>
<label for="cluster.modePublicpeer">Public Peer</label>
<input type="radio" value="publicpeer" id="cluster.modePublicpeer" name="cluster.mode"
<label for="publicpeer">Public Peer</label>
<input type="radio" value="publicpeer" id="publicpeer" name="cluster.mode"
#(publicpeerChecked)#::checked="checked" #(/publicpeerChecked)#/>
</dt>
<dd>
@ -187,8 +173,8 @@
Your peer does not accept any outside index data, but responds on all remote search requests.
</dd>
<dt>
<label for="cluster.modePubliccluster">Public Cluster</label>
<input type="radio" value="publiccluster" id="cluster.modePubliccluster" name="cluster.mode"
<label for="publiccluster">Public Cluster</label>
<input type="radio" value="publiccluster" id="publiccluster" name="cluster.mode"
#(publicclusterChecked)#::checked="checked" #(/publicclusterChecked)#/>
</dt>
<dd>

View File

@ -40,25 +40,35 @@ import de.anomic.data.WorkTables;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class ConfigNetwork_p {
public class ConfigNetwork_p
{
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws FileNotFoundException, IOException {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) throws FileNotFoundException, IOException {
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
int commit = 0;
// load all options for network definitions
final File networkBootstrapLocationsFile = new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
final File networkBootstrapLocationsFile =
new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
final Set<String> networkBootstrapLocations = FileUtils.loadList(networkBootstrapLocationsFile);
if ( post != null ) {
// store this call as api call
sb.tables.recordAPICall(post, "ConfigNetwork_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "network settings");
sb.tables.recordAPICall(
post,
"ConfigNetwork_p.html",
WorkTables.TABLE_API_TYPE_CONFIGURATION,
"network settings");
if ( post.containsKey("changeNetwork") ) {
final String networkDefinition = post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
final String networkDefinition =
post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
if ( networkDefinition.equals(sb.getConfig("network.unit.definition", "")) ) {
// no change
commit = 3;
@ -74,6 +84,12 @@ public class ConfigNetwork_p {
// DHT control
boolean indexDistribute = "on".equals(post.get("indexDistribute", ""));
boolean indexReceive = "on".equals(post.get("indexReceive", ""));
if ( !indexReceive ) {
// remove heuristics
sb.setConfig("heuristic.site", false);
sb.setConfig("heuristic.scroogle", false);
sb.setConfig("heuristic.blekko", false);
}
final boolean robinsonmode = "robinson".equals(post.get("network", ""));
if ( robinsonmode ) {
indexDistribute = false;
@ -127,12 +143,17 @@ public class ConfigNetwork_p {
}
if ( post.containsKey("peertags") ) {
sb.peers.mySeed().setPeerTags(MapTools.string2set(normalizedList(post.get("peertags")), ","));
sb.peers.mySeed().setPeerTags(
MapTools.string2set(normalizedList(post.get("peertags")), ","));
}
sb.setConfig("cluster.mode", post.get("cluster.mode", "publicpeer"));
sb.setConfig("cluster.mode", post.get(
SwitchboardConstants.CLUSTER_MODE,
SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER));
sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", "")));
sb.setConfig("cluster.peers.yacydomain", checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
sb.setConfig(
"cluster.peers.yacydomain",
checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
// update the cluster hash set
sb.clusterhashes = sb.peers.clusterHashes(sb.getConfig("cluster.peers.yacydomain", ""));
@ -144,20 +165,34 @@ public class ConfigNetwork_p {
// write remote crawl request settings
prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? "1" : "0");
final long RTCbusySleep = Math.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
final long RTCbusySleep =
Math
.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
final int RTCppm = (int) (60000L / RTCbusySleep);
prop.put("acceptCrawlLimit", RTCppm);
final boolean indexDistribute = sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW, true);
final boolean indexReceive = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
prop.put("indexDistributeChecked", (indexDistribute) ? "1" : "0");
prop.put("indexDistributeWhileCrawling.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
prop.put("indexDistributeWhileCrawling.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
prop.put("indexDistributeWhileIndexing.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
prop.put("indexDistributeWhileIndexing.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
prop.put(
"indexDistributeWhileCrawling.on",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
prop.put(
"indexDistributeWhileCrawling.off",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
prop.put(
"indexDistributeWhileIndexing.on",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
prop.put(
"indexDistributeWhileIndexing.off",
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
prop.put("indexReceiveChecked", (indexReceive) ? "1" : "0");
prop.put("indexReceiveBlockBlacklistChecked.on", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
prop.put("indexReceiveBlockBlacklistChecked.off", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
prop.put(
"indexReceiveBlockBlacklistChecked.on",
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
prop.put(
"indexReceiveBlockBlacklistChecked.off",
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
prop.putHTML("peertags", MapTools.set2string(sb.peers.mySeed().getPeerTags(), ",", false));
// set seed information directly
@ -180,10 +215,15 @@ public class ConfigNetwork_p {
prop.put("cluster.peers.yacydomain.hashes", hashes.toString());
// set p2p mode flags
prop.put("privatepeerChecked", ("privatepeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("privateclusterChecked", ("privatecluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("publicclusterChecked", ("publiccluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put("publicpeerChecked", ("publicpeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
prop.put(
"privatepeerChecked",
(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
prop.put(
"publicclusterChecked",
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
prop.put(
"publicpeerChecked",
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
// set network configuration
prop.putHTML("network.unit.definition", sb.getConfig("network.unit.definition", ""));
@ -218,8 +258,10 @@ public class ConfigNetwork_p {
final String[] array = normalizedList(input).split(",");
final StringBuilder output = new StringBuilder();
for ( final String element : array ) {
if ((element.endsWith(".yacyh")) || (element.endsWith(".yacy")) ||
(element.indexOf(".yacyh=",0) > 0) || (element.indexOf(".yacy=",0) > 0)) {
if ( (element.endsWith(".yacyh"))
|| (element.endsWith(".yacy"))
|| (element.indexOf(".yacyh=", 0) > 0)
|| (element.indexOf(".yacy=", 0) > 0) ) {
output.append(",").append(element);
}
}

0
htroot/IndexCleaner_p.html Executable file → Normal file
View File

0
htroot/IndexCleaner_p.java Executable file → Normal file
View File

View File

@ -70,6 +70,12 @@
</dd>
#(/urgentStatusVirgin)#
#(hintStatusPrivate)#::
<dt class="hintIcon"><img src="env/grafics/bad.png" width="32" height="32" alt="idea"/></dt>
<dd class="hint">Your network configuration is in private mode. Your peer seed will not be published.
</dd>
#(/hintStatusPrivate)#
<!-- warnings -->
#(warningGoOnline)#::

View File

@ -46,25 +46,31 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Status {
public class Status
{
private static final String SEEDSERVER = "seedServer";
private static final String PEERSTATUS = "peerStatus";
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) {
// return variable that accumulates replacements
final serverObjects prop = new serverObjects();
final Switchboard sb = (Switchboard) env;
// check if the basic configuration was accessed before and forward
prop.put("forwardToConfigBasic", 0);
if ((post == null || !post.containsKey("noforward")) &&
sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html",0) < 0 &&
Seed.isDefaultPeerName(sb.peers.mySeed().getName())) {
if ( (post == null || !post.containsKey("noforward"))
&& sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html", 0) < 0
&& Seed.isDefaultPeerName(sb.peers.mySeed().getName()) ) {
// forward to ConfigBasic
prop.put("forwardToConfigBasic", 1);
}
if (post != null) post.remove("noforward");
if ( post != null ) {
post.remove("noforward");
}
if ( post != null && post.size() > 0 ) {
if ( sb.adminAuthenticated(header) < 2 ) {
@ -123,7 +129,8 @@ public class Status {
}
// password protection
if ((sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) {
if ( (sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0)
&& (!sb.getConfigBool("adminAccountForLocalhost", false)) ) {
prop.put("protection", "0"); // not protected
prop.put("urgentSetPassword", "1");
} else {
@ -142,7 +149,8 @@ public class Status {
prop.put("warningDiskSpaceLow_minSpace", minFree);
}
if ( !sb.observer.getMemoryAvailable() ) {
final String minFree = Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
final String minFree =
Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
prop.put("warningMemoryLow", "1");
prop.put("warningMemoryLow_minSpace", minFree);
}
@ -151,7 +159,8 @@ public class Status {
// version information
//final String versionstring = yacyVersion.combined2prettyVersion(sb.getConfig("version","0.1"));
final String versionstring = yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
final String versionstring =
yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
prop.put("versionpp", versionstring);
// place some more hints
@ -200,7 +209,9 @@ public class Status {
prop.put("peerStatistics", "1");
prop.put("peerStatistics_uptime", PeerActions.formatInterval(uptime));
prop.putNum("peerStatistics_pagesperminute", sb.peers.mySeed().getPPM());
prop.putNum("peerStatistics_queriesperhour", Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
prop.putNum(
"peerStatistics_queriesperhour",
Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
prop.putNum("peerStatistics_links", sb.peers.mySeed().getLinkCount());
prop.put("peerStatistics_words", Formatter.number(sb.peers.mySeed().getWordCount()));
prop.putNum("peerStatistics_disconnects", sb.peers.peerActions.disconnects);
@ -215,11 +226,19 @@ public class Status {
prop.putXML("peerAddress_peername", sb.peers.mySeed().getName().toLowerCase());
}
}
final String peerStatus = ((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN));
if (Seed.PEERTYPE_VIRGIN.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
final String peerStatus =
((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(
Seed.PEERTYPE,
Seed.PEERTYPE_VIRGIN));
if ( Seed.PEERTYPE_VIRGIN.equals(peerStatus)
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
prop.put(PEERSTATUS, "0");
prop.put("urgentStatusVirgin", "1");
} else if (Seed.PEERTYPE_JUNIOR.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
} else if ( Seed.PEERTYPE_JUNIOR.equals(peerStatus)
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
prop.put(PEERSTATUS, "1");
prop.put("warningStatusJunior", "1");
} else if ( Seed.PEERTYPE_SENIOR.equals(peerStatus) ) {
@ -234,9 +253,10 @@ public class Status {
prop.put("hash", thisHash);
final String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
if (!"none".equalsIgnoreCase(seedUploadMethod) ||
("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 ||
sb.getConfig("seedFilePath", "").length() > 0))) {
if ( !"none".equalsIgnoreCase(seedUploadMethod)
|| ("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 || sb
.getConfig("seedFilePath", "")
.length() > 0)) ) {
if ( "".equals(seedUploadMethod) ) {
if ( sb.getConfig("seedFTPPassword", "").length() > 0 ) {
sb.setConfig("seedUploadMethod", "Ftp");
@ -256,7 +276,8 @@ public class Status {
prop.put(SEEDSERVER, "2"); // enabled
prop.putHTML("seedServer_seedFile", sb.getConfig("seedFilePath", ""));
}
prop.put("seedServer_lastUpload",
prop.put(
"seedServer_lastUpload",
PeerActions.formatInterval(System.currentTimeMillis() - sb.peers.lastSeedUpload_timeStamp));
} else {
prop.put(SEEDSERVER, "0"); // disabled
@ -307,11 +328,19 @@ public class Status {
prop.putNum("loaderQueueMax", loaderMaxCount);
prop.put("loaderQueuePercent", (loaderPercent > 100) ? 100 : loaderPercent);
prop.putNum("localCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
prop.put("localCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? "1" : "0");
prop.putNum("localCrawlQueueSize", sb
.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
.getJobCount());
prop.put("localCrawlPaused", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
? "1"
: "0");
prop.putNum("remoteTriggeredCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.put("remoteTriggeredCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
prop.putNum(
"remoteTriggeredCrawlQueueSize",
sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
prop.put(
"remoteTriggeredCrawlPaused",
sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
prop.putNum("stackCrawlQueueSize", sb.crawlStacker.size());

0
htroot/api/bookmarks/posts/add_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/all.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/delete_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/posts/get.java Executable file → Normal file
View File

0
htroot/api/bookmarks/tags/editTag_p.java Executable file → Normal file
View File

0
htroot/api/bookmarks/tags/getTag.java Executable file → Normal file
View File

0
htroot/api/bookmarks/xbel/xbel.java Executable file → Normal file
View File

0
htroot/api/feed.java Executable file → Normal file
View File

0
htroot/api/getpageinfo_p.java Executable file → Normal file
View File

0
htroot/api/ynetSearch.java Executable file → Normal file
View File

0
htroot/compare_yacy.html Executable file → Normal file
View File

0
htroot/compare_yacy.java Executable file → Normal file
View File

0
htroot/processing/domaingraph/applet/domaingraph.java Executable file → Normal file
View File

0
htroot/processing/domaingraph/applet/index.html Executable file → Normal file
View File

0
htroot/rssTerminal.html Executable file → Normal file
View File

0
htroot/terminal_p.html Executable file → Normal file
View File

View File

@ -81,13 +81,18 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class yacysearch {
public class yacysearch
{
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
public static serverObjects respond(
final RequestHeader header,
final serverObjects post,
final serverSwitch env) {
final Switchboard sb = (Switchboard) env;
sb.localSearchLastAccess = System.currentTimeMillis();
final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
final boolean searchAllowed =
sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
boolean authenticated = sb.adminAuthenticated(header) >= 2;
if ( !authenticated ) {
@ -96,15 +101,17 @@ public class yacysearch {
}
final boolean localhostAccess = sb.accessFromLocalhost(header);
final String promoteSearchPageGreeting =
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ?
env.getConfig("network.unit.description", "") :
env.getConfig(SwitchboardConstants.GREETING, "");
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ? env.getConfig(
"network.unit.description",
"") : env.getConfig(SwitchboardConstants.GREETING, "");
final String client = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
// get query
final String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
final String originalquerystring =
(post == null) ? "" : post.get("query", post.get("search", "")).trim();
String querystring = originalquerystring.replace('+', ' ').replace('*', ' ').trim();
CacheStrategy snippetFetchStrategy = (post == null) ? null : CacheStrategy.parse(post.get("verify", "cacheonly"));
CacheStrategy snippetFetchStrategy =
(post == null) ? null : CacheStrategy.parse(post.get("verify", "cacheonly"));
final servletProperties prop = new servletProperties();
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
@ -124,8 +131,12 @@ public class yacysearch {
final boolean rss = EXT.equals("rss");
final boolean json = EXT.equals("json");
prop.put("promoteSearchPageGreeting", promoteSearchPageGreeting);
prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
prop.put("promoteSearchPageGreeting.smallImage", sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
prop.put(
"promoteSearchPageGreeting.homepage",
sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
prop.put(
"promoteSearchPageGreeting.smallImage",
sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
if ( post == null || indexSegment == null || env == null || !searchAllowed ) {
// we create empty entries for template strings
prop.put("searchagain", "0");
@ -140,8 +151,12 @@ public class yacysearch {
prop.put("constraint", "");
prop.put("cat", "href");
prop.put("depth", "0");
prop.put("search.verify", (post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh"));
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put(
"search.verify",
(post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh"));
prop.put(
"search.navigation",
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put("contentdom", "text");
prop.put("contentdomCheckText", "1");
prop.put("contentdomCheckAudio", "0");
@ -180,7 +195,14 @@ public class yacysearch {
// collect search attributes
int maximumRecords = Math.min((authenticated) ? (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 100 : 5000) : (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000), post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
int maximumRecords =
Math.min(
(authenticated)
? (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline()
? 100
: 5000) : (snippetFetchStrategy != null
&& snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000),
post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
int startRecord = post.getInt("startRecord", post.getInt("offset", 0));
boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0;
@ -198,15 +220,21 @@ public class yacysearch {
prefermask = ".*" + prefermask + ".*";
}
Bitfield constraint = (post != null && post.containsKey("constraint") && !post.get("constraint", "").isEmpty()) ? new Bitfield(4, post.get("constraint", "______")) : null;
Bitfield constraint =
(post != null && post.containsKey("constraint") && !post.get("constraint", "").isEmpty())
? new Bitfield(4, post.get("constraint", "______"))
: null;
if ( indexof ) {
constraint = new Bitfield(4);
constraint.set(Condenser.flag_cat_indexof, true);
}
// SEARCH
final boolean clustersearch = sb.isRobinsonMode() && (sb.getConfig("cluster.mode", "").equals("privatecluster") || sb.getConfig("cluster.mode", "").equals("publiccluster"));
final boolean indexReceiveGranted = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true) || sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true) || clustersearch;
final boolean clustersearch = sb.isRobinsonMode() && sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER);
final boolean indexReceiveGranted =
sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true)
|| sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true)
|| clustersearch;
global = global && indexReceiveGranted; // if the user does not want indexes from remote peers, it cannot be a global searchnn
// increase search statistic counter
@ -225,13 +253,18 @@ public class yacysearch {
}
// find search domain
final ContentDomain contentdom = ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
final ContentDomain contentdom =
ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
// patch until better search profiles are available
if ( contentdom == ContentDomain.TEXT ) {
if (maximumRecords > 50 && maximumRecords < 100) maximumRecords = 10;
if ( maximumRecords > 50 && maximumRecords < 100 ) {
maximumRecords = 10;
}
} else {
if (maximumRecords <= 32) maximumRecords = 64;
if ( maximumRecords <= 32 ) {
maximumRecords = 64;
}
}
// check the search tracker
@ -246,33 +279,67 @@ public class yacysearch {
snippetFetchStrategy = null;
}
block = true;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM "
+ client
+ " gets no permission to search");
} else if ( Domains.matchesList(client, sb.networkWhitelist) ) {
Log.logInfo("LOCAL_SEARCH", "ACCESS CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
Log.logInfo("LOCAL_SEARCH", "ACCESS CONTROL: WHITELISTED CLIENT FROM "
+ client
+ " gets no search restrictions");
} else if ( !authenticated && !localhostAccess ) {
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
synchronized ( trackerHandles ) {
final int accInThreeSeconds = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size();
final int accInOneMinute = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size();
final int accInTenMinutes = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size();
final int accInThreeSeconds =
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size();
final int accInOneMinute =
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size();
final int accInTenMinutes =
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size();
// protections against too strong YaCy network load, reduces remote search
if ( global ) {
if ( accInTenMinutes >= 60 || accInOneMinute >= 6 || accInThreeSeconds >= 1 ) {
global = false;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed global search");
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
+ client
+ ": "
+ accInThreeSeconds
+ "/3s, "
+ accInOneMinute
+ "/60s, "
+ accInTenMinutes
+ "/600s, "
+ " requests, disallowed global search");
}
}
// protection against too many remote server snippet loads (protects traffic on server)
if ( snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ) {
if ( accInTenMinutes >= 20 || accInOneMinute >= 4 || accInThreeSeconds >= 1 ) {
snippetFetchStrategy = CacheStrategy.CACHEONLY;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed remote snippet loading");
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
+ client
+ ": "
+ accInThreeSeconds
+ "/3s, "
+ accInOneMinute
+ "/60s, "
+ accInTenMinutes
+ "/600s, "
+ " requests, disallowed remote snippet loading");
}
}
// general load protection
if ( accInTenMinutes >= 3000 || accInOneMinute >= 600 || accInThreeSeconds >= 60 ) {
block = true;
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed search");
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
+ client
+ ": "
+ accInThreeSeconds
+ "/3s, "
+ accInOneMinute
+ "/60s, "
+ accInTenMinutes
+ "/600s, "
+ " requests, disallowed search");
}
}
}
@ -363,7 +430,9 @@ public class yacysearch {
}
String ft = querystring.substring(filetype + 9, ftb);
querystring = querystring.replace("filetype:" + ft, "");
while (!ft.isEmpty() && ft.charAt(0) == '.') ft = ft.substring(1);
while ( !ft.isEmpty() && ft.charAt(0) == '.' ) {
ft = ft.substring(1);
}
if ( !ft.isEmpty() ) {
if ( urlmask == null ) {
urlmask = ".*\\." + ft;
@ -382,7 +451,9 @@ public class yacysearch {
if ( tenant != null ) {
if ( urlmask == null ) {
urlmask = ".*" + tenant + ".*";
} else urlmask = ".*" + tenant + urlmask;
} else {
urlmask = ".*" + tenant + urlmask;
}
}
}
final int site = querystring.indexOf("site:", 0);
@ -484,7 +555,8 @@ public class yacysearch {
}
// navigation
final String navigation = (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
final String navigation =
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
// the query
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
@ -515,7 +587,10 @@ public class yacysearch {
map.put("urlhash", delHash);
map.put("vote", "negative");
map.put("refid", "");
sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_SURFTIPP_VOTE_ADD, map);
sb.peers.newsPool.publishMyNews(
sb.peers.mySeed(),
NewsPool.CATEGORY_SURFTIPP_VOTE_ADD,
map);
}
// delete the search history since this still shows the entry
@ -536,7 +611,12 @@ public class yacysearch {
if ( urlentry != null ) {
Document[] documents = null;
try {
documents = sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE);
documents =
sb.loader.loadDocuments(
sb.loader.request(urlentry.url(), true, false),
CacheStrategy.IFEXIST,
5000,
Integer.MAX_VALUE);
} catch ( final IOException e ) {
} catch ( final Parser.Failure e ) {
}
@ -548,7 +628,10 @@ public class yacysearch {
map.put("description", documents[0].dc_title().replace(',', ' '));
map.put("author", documents[0].dc_creator());
map.put("tags", documents[0].dc_subject(' '));
sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_SURFTIPP_ADD, map);
sb.peers.newsPool.publishMyNews(
sb.peers.mySeed(),
NewsPool.CATEGORY_SURFTIPP_ADD,
map);
documents[0].close();
}
}
@ -564,7 +647,13 @@ public class yacysearch {
final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash));
if ( urlentry != null ) {
try {
sb.tables.bookmarks.createBookmark(sb.loader, urlentry.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search");
sb.tables.bookmarks.createBookmark(
sb.loader,
urlentry.url(),
YMarkTables.USER_ADMIN,
true,
"searchresult",
"/search");
} catch ( final Throwable e ) {
}
}
@ -593,7 +682,8 @@ public class yacysearch {
prefermask = "";
}
final QueryParams theQuery = new QueryParams(
final QueryParams theQuery =
new QueryParams(
originalquerystring,
queryHashes,
Word.words2hashesHandles(query[1]),
@ -610,8 +700,9 @@ public class yacysearch {
maximumRecords,
startRecord,
urlmask,
clustersearch && global ? QueryParams.Searchdom.CLUSTER :
(global && indexReceiveGranted ? QueryParams.Searchdom.GLOBAL : QueryParams.Searchdom.LOCAL),
clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted
? QueryParams.Searchdom.GLOBAL
: QueryParams.Searchdom.LOCAL),
20,
constraint,
true,
@ -624,9 +715,16 @@ public class yacysearch {
indexSegment,
ranking,
header.get(RequestHeader.USER_AGENT, ""),
sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false) && sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) && sb.peers.mySeed().getFlagAcceptRemoteIndex());
sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false)
&& sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false)
&& sb.peers.mySeed().getFlagAcceptRemoteIndex());
EventTracker.delete(EventTracker.EClass.SEARCH);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEvent.Type.INITIALIZATION, "", 0, 0), false);
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
theQuery.id(true),
SearchEvent.Type.INITIALIZATION,
"",
0,
0), false);
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(3000);
@ -635,8 +733,19 @@ public class yacysearch {
theQuery.filterOut(Switchboard.blueList);
// log
Log.logInfo("LOCAL_SEARCH", "INIT WORD SEARCH: " + theQuery.queryString + ":" + QueryParams.hashSet2hashString(theQuery.queryHashes) + " - " + theQuery.neededResults() + " links to be computed, " + theQuery.displayResults() + " lines to be displayed");
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(new RSSMessage("Local Search Request", theQuery.queryString, ""));
Log.logInfo(
"LOCAL_SEARCH",
"INIT WORD SEARCH: "
+ theQuery.queryString
+ ":"
+ QueryParams.hashSet2hashString(theQuery.queryHashes)
+ " - "
+ theQuery.neededResults()
+ " links to be computed, "
+ theQuery.displayResults()
+ " lines to be displayed");
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(
new RSSMessage("Local Search Request", theQuery.queryString, ""));
final long timestamp = System.currentTimeMillis();
// create a new search event
@ -644,10 +753,20 @@ public class yacysearch {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
startRecord = 0;
}
final SearchEvent theSearch = SearchEventCache.getEvent(
theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader,
(int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 10)),
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)),
final SearchEvent theSearch =
SearchEventCache.getEvent(
theQuery,
sb.peers,
sb.tables,
(sb.isRobinsonMode()) ? sb.clusterhashes : null,
false,
sb.loader,
(int) sb.getConfigLong(
SwitchboardConstants.REMOTESEARCH_MAXCOUNT_USER,
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 10)),
sb.getConfigLong(
SwitchboardConstants.REMOTESEARCH_MAXTIME_USER,
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)),
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0),
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
@ -655,7 +774,8 @@ public class yacysearch {
if ( sitehost != null && sb.getConfigBool("heuristic.site", false) && authenticated ) {
sb.heuristicSite(theSearch, sitehost);
}
if ((heuristicScroogle >= 0 || sb.getConfigBool("heuristic.scroogle", false)) && authenticated) {
if ( (heuristicScroogle >= 0 || sb.getConfigBool("heuristic.scroogle", false))
&& authenticated ) {
sb.heuristicScroogle(theSearch);
}
if ( (heuristicBlekko >= 0 || sb.getConfigBool("heuristic.blekko", false)) && authenticated ) {
@ -664,15 +784,30 @@ public class yacysearch {
}
// log
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
"local-unfiltered(" + theSearch.getRankingResult().getLocalIndexCount() + "), " +
"local_miss(" + theSearch.getRankingResult().getMissCount() + "), " +
"local_sortout(" + theSearch.getRankingResult().getSortOutCount() + "), " +
"remote(" + theSearch.getRankingResult().getRemoteResourceSize() + ") links found, " +
(System.currentTimeMillis() - timestamp) + " ms");
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: "
+ theQuery.queryString
+ " - "
+ "local-unfiltered("
+ theSearch.getRankingResult().getLocalIndexCount()
+ "), "
+ "local_miss("
+ theSearch.getRankingResult().getMissCount()
+ "), "
+ "local_sortout("
+ theSearch.getRankingResult().getSortOutCount()
+ "), "
+ "remote("
+ theSearch.getRankingResult().getRemoteResourceSize()
+ ") links found, "
+ (System.currentTimeMillis() - timestamp)
+ " ms");
// prepare search statistics
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
theQuery.resultcount =
theSearch.getRankingResult().getLocalIndexCount()
- theSearch.getRankingResult().getMissCount()
- theSearch.getRankingResult().getSortOutCount()
+ theSearch.getRankingResult().getRemoteIndexCount();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
@ -683,16 +818,23 @@ public class yacysearch {
prop.put("meanCount", meanMax);
if ( meanMax > 0 && !json && !rss ) {
final DidYouMean didYouMean = new DidYouMean(indexSegment.termIndex(), new StringBuilder(querystring));
final DidYouMean didYouMean =
new DidYouMean(indexSegment.termIndex(), new StringBuilder(querystring));
final Iterator<StringBuilder> meanIt = didYouMean.getSuggestions(100, 5).iterator();
int meanCount = 0;
String suggestion;
while ( meanCount < meanMax && meanIt.hasNext() ) {
suggestion = meanIt.next().toString();
prop.put("didYouMean_suggestions_" + meanCount + "_word", suggestion);
prop.put("didYouMean_suggestions_"+meanCount+"_url",
QueryParams.navurl("html", 0, theQuery, suggestion, originalUrlMask.toString(), theQuery.navigators).toString()
);
prop.put(
"didYouMean_suggestions_" + meanCount + "_url",
QueryParams.navurl(
"html",
0,
theQuery,
suggestion,
originalUrlMask.toString(),
theQuery.navigators).toString());
prop.put("didYouMean_suggestions_" + meanCount + "_sep", "|");
meanCount++;
}
@ -714,7 +856,9 @@ public class yacysearch {
prop.put("geoinfo_loc_" + i + "_lat", Math.round(c.lat() * 10000.0f) / 10000.0f);
prop.put("geoinfo_loc_" + i + "_name", c.getName());
i++;
if (i >= 10) break;
if ( i >= 10 ) {
break;
}
}
prop.put("geoinfo_loc", i);
prop.put("geoinfo", "1");
@ -725,39 +869,71 @@ public class yacysearch {
synchronized ( trackerHandles ) {
trackerHandles.add(theQuery.time);
while ( trackerHandles.size() > 600 ) {
if (!trackerHandles.remove(trackerHandles.first())) break;
if ( !trackerHandles.remove(trackerHandles.first()) ) {
break;
}
}
}
sb.localSearchTracker.put(client, trackerHandles);
if ( sb.localSearchTracker.size() > 100 ) {
sb.localSearchTracker.remove(sb.localSearchTracker.keys().nextElement());
}
if (MemoryControl.shortStatus()) sb.localSearchTracker.clear();
if ( MemoryControl.shortStatus() ) {
sb.localSearchTracker.clear();
}
} catch ( final Exception e ) {
Log.logException(e);
}
final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
final int indexcount =
theSearch.getRankingResult().getLocalIndexCount()
- theSearch.getRankingResult().getMissCount()
- theSearch.getRankingResult().getSortOutCount()
+ theSearch.getRankingResult().getRemoteIndexCount();
prop.put("num-results_offset", startRecord == 0 ? 0 : startRecord + 1);
prop.put("num-results_itemscount", Formatter.number(startRecord + theSearch.getQuery().itemsPerPage > indexcount ? startRecord + indexcount % theSearch.getQuery().itemsPerPage : startRecord + theSearch.getQuery().itemsPerPage, true));
prop.put("num-results_itemscount", Formatter.number(
startRecord + theSearch.getQuery().itemsPerPage > indexcount ? startRecord
+ indexcount
% theSearch.getQuery().itemsPerPage : startRecord + theSearch.getQuery().itemsPerPage,
true));
prop.put("num-results_itemsPerPage", maximumRecords);
prop.put("num-results_totalcount", Formatter.number(indexcount, true));
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0");
prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
prop.put("num-results_globalresults_localMissCount", Formatter.number(theSearch.getRankingResult().getMissCount(), true));
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.getRankingResult().getRemotePeerCount(), true));
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch)
? "1"
: "0");
prop.put(
"num-results_globalresults_localResourceSize",
Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
prop.put(
"num-results_globalresults_localMissCount",
Formatter.number(theSearch.getRankingResult().getMissCount(), true));
prop.put(
"num-results_globalresults_remoteResourceSize",
Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
prop.put(
"num-results_globalresults_remoteIndexCount",
Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
prop.put(
"num-results_globalresults_remotePeerCount",
Formatter.number(theSearch.getRankingResult().getRemotePeerCount(), true));
// compose page navigation
final StringBuilder resnav = new StringBuilder(200);
final int thispage = startRecord / theQuery.displayResults();
if ( thispage == 0 ) {
resnav.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" />&nbsp;");
resnav
.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" />&nbsp;");
} else {
resnav.append("<a id=\"prevpage\" href=\"");
resnav.append(QueryParams.navurl("html", thispage - 1, theQuery, null, originalUrlMask, navigation).toString());
resnav.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;");
resnav.append(QueryParams.navurl(
"html",
thispage - 1,
theQuery,
null,
originalUrlMask,
navigation).toString());
resnav
.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a>&nbsp;");
}
final int numberofpages = Math.min(10, 1 + ((indexcount - 1) / theQuery.displayResults()));
@ -770,7 +946,9 @@ public class yacysearch {
resnav.append("\" width=\"16\" height=\"16\" />&nbsp;");
} else {
resnav.append("<a href=\"");
resnav.append(QueryParams.navurl("html", i, theQuery, null, originalUrlMask, navigation).toString());
resnav.append(QueryParams
.navurl("html", i, theQuery, null, originalUrlMask, navigation)
.toString());
resnav.append("\"><img src=\"env/grafics/navd");
resnav.append(i + 1);
resnav.append(".gif\" alt=\"page");
@ -779,11 +957,19 @@ public class yacysearch {
}
}
if ( thispage >= numberofpages ) {
resnav.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
resnav
.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
} else {
resnav.append("<a id=\"nextpage\" href=\"");
resnav.append(QueryParams.navurl("html", thispage + 1, theQuery, null, originalUrlMask, navigation).toString());
resnav.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
resnav.append(QueryParams.navurl(
"html",
thispage + 1,
theQuery,
null,
originalUrlMask,
navigation).toString());
resnav
.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
}
final String resnavs = resnav.toString();
prop.put("num-results_resnav", resnavs);
@ -796,7 +982,12 @@ public class yacysearch {
prop.put("results_" + i + "_eventID", theQuery.id(false));
}
prop.put("results", theQuery.displayResults());
prop.put("resultTable", (contentdom == ContentDomain.APP || contentdom == ContentDomain.AUDIO || contentdom == ContentDomain.VIDEO) ? 1 : 0);
prop
.put(
"resultTable",
(contentdom == ContentDomain.APP || contentdom == ContentDomain.AUDIO || contentdom == ContentDomain.VIDEO)
? 1
: 0);
prop.put("eventID", theQuery.id(false)); // for bottomline
// process result of search
@ -838,10 +1029,20 @@ public class yacysearch {
prop.putHTML("prefermaskfilter", prefermask);
prop.put("indexof", (indexof) ? "on" : "off");
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());
prop.put("search.verify", snippetFetchStrategy == null ? sb.getConfig("search.verify", "iffresh") : snippetFetchStrategy.toName());
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put("search.verify", snippetFetchStrategy == null
? sb.getConfig("search.verify", "iffresh")
: snippetFetchStrategy.toName());
prop.put(
"search.navigation",
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text")));
prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0);
prop.put(
"searchdomswitches",
sb.getConfigBool("search.text", true)
|| sb.getConfigBool("search.audio", true)
|| sb.getConfigBool("search.video", true)
|| sb.getConfigBool("search.image", true)
|| sb.getConfigBool("search.app", true) ? 1 : 0);
prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0);
prop.put("searchdomswitches_searchaudio", sb.getConfigBool("search.audio", true) ? 1 : 0);
prop.put("searchdomswitches_searchvideo", sb.getConfigBool("search.video", true) ? 1 : 0);

319
skins/28c3.css Normal file
View File

@ -0,0 +1,319 @@
/* generic skin */
/*
The following colours must be defined:
#000000
#A3CC8B
#38A535
#E08040
#333333
#222222
#FFCCCC
#888888
#990000
#009900
#000099
#FFFFFF
#008000
#800000
*/
body {
background-color:#000000;
color:#A3CC8B;
}
a:link {
color:#A3CC8B;
background-color:transparent;
}
a:link:hover {
color: #38A535;
background-color:transparent;
}
/* Menu */
.menugroup h3 {
-webkit-border-top-left-radius: 5px;
-webkit-border-top-right-radius: 5px;
-khtml-border-top-left-radius: 5px;
-khtml-border-top-right-radius: 5px;
-moz-border-radius-topleft: 5px;
-moz-border-radius-topright: 5px;
border-top-left-radius: 5px;
border-top-right-radius: 5px;
background-color: #E08040;
color:white;
}
.SubMenugroup h3, .SubMenu h3 {
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
background-color:#E08040;
color:white;
}
a.MenuItemLink, ul.SubMenu em {
background-color:#333333;
color:#A3CC8B;
}
a:hover.MenuItemLink {
background-color:transparent;
color:#A3CC8B;
}
/* Head */
div.head h1 {
background-color:transparent;
text-align:left;
padding-left:70px;
color:#A3CC8B;
}
/* Tables */
table {
}
.TableHeader {
background-color: #38A535;
color:white;
}
.TableCellDark {
background-color: #333333;
}
.TableCellLight {
background-color: #222222;
}
.TableCellSummary {
background-color: #FFCCCC;
border:1px solid #888888;
}
.TableCellActive {
background-color: #FFCCCC;
}
/* Blog and Wiki*/
.Post {
background-color:#000000;
}
.PostSubject {
background-color:#000000;
}
.PostSubject a {
color:#A3CC8B;
}
.PostInfo {
background-color:#000000;
}
/* Wiki */
.WikiTOCBox {
border: 1px solid #888888;
background-color: #000000;
}
a.unknown {
color:#990000;
}
a.known {
color:#009900;
}
a.extern {
color:#000099;
}
/* in Bookmarks */
.bookmark {
border-bottom:1px #888888 dashed;
}
a.bookmarkTitle {
color: #E08040;
}
a:hover.bookmarkTitle {
color: #E08040;
}
a.bookmarkTags {
color: #FFFFFF;
}
a:hover.bookmarkTags {
color: #E08040;
}
a.bookmarkAction {
color: #888888;
}
.Tags {
border-left: 2px solid #A3CC8B;
}
.diff { background-color: #000000; }
.diff .unchanged { color: #000099; }
.diff .added { color: #009900; background-color: #000000; }
.diff .deleted { color: #990000; background-color: #000000; }
/* in Status.html */
.ProgressBar {
border: #000000 solid 1px;
}
div.ProgressBarFill {
background-color:#333333;
}
/* Copyright info */
div#api {
position:absolute;
top:3px;
right:20px;
z-index: 100;
}
div#yacylivesearch {
float:right;
margin-right: 90px;
margin-top: -24px;
}
/* Searchresults */
fieldset.maininput, fieldset.yacys {
background-color:#333333;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
}
form.search.small h2 {
border-bottom:1px solid #888888;
}
.searchresults {
/*border-top:1px #888888 dashed;*/
}
.searchresults h4 a {
font-size:1.2em;
font-weight:normal;
text-decoration:underline;
color:#FFFFFF;
}
.searchresults h4 a:link:hover {
color:#FFFFFF;
}
.snippetLoaded strong {
color:black;
}
.searchresults .url a {
color:#008000;
}
.searchresults .url a:link:hover {
color:#800000;
}
/* other */
.settingsValue {
color:#000099;
}
.Headline {
background-color: #E08040;
color: white;
}
.Heading {
background-color: #E08040;
}
.error, .warning {
color:red;
}
.success {
color:green;
}
.Message {
background-color: #000000;
}
.example {
background-color:#000099;
}
.hides:hover .hoverShow { background-color: #000000; }
/* Log */
body#ViewLog pre {
background-color:white;
}
/* Forms */
fieldset {
background-color:#333333;
color:#A3CC8B;
border:0px solid #333333;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
}
/* input, select, textarea, button {
color: black;
background-color: white;
} */
legend {
background-color:#38A535;
-webkit-border-radius: 5px;
-khtml-border-radius: 5px;
-moz-border-radius: 5px;
border-radius: 5px;
text-align:left;
color:white;
}
form dt, dl.pairs dt {
background-color:#333333;
font-weight:bold;
}
form dd, dl.pairs dd {
background-color:#333333;
}

View File

@ -142,11 +142,19 @@ public class CrawlQueues {
* @return if the hash exists, the name of the database is returned, otherwise null is returned
*/
public String urlExists(final byte[] hash) {
if (this.delegatedURL.exists(hash)) return "delegated";
if (this.errorURL.exists(hash)) return "errors";
if (this.noticeURL.existsInStack(hash)) return "crawler";
if (this.delegatedURL.exists(hash)) {
return "delegated";
}
if (this.errorURL.exists(hash)) {
return "errors";
}
if (this.noticeURL.existsInStack(hash)) {
return "crawler";
}
for (final Loader worker: this.workers.values()) {
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) return "worker";
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) {
return "worker";
}
}
return null;
}
@ -159,16 +167,26 @@ public class CrawlQueues {
public DigestURI getURL(final byte[] urlhash) {
assert urlhash != null;
if (urlhash == null || urlhash.length == 0) return null;
if (urlhash == null || urlhash.length == 0) {
return null;
}
ZURL.Entry ee = this.delegatedURL.get(urlhash);
if (ee != null) return ee.url();
if (ee != null) {
return ee.url();
}
ee = this.errorURL.get(urlhash);
if (ee != null) return ee.url();
if (ee != null) {
return ee.url();
}
for (final Loader w: this.workers.values()) {
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) return w.request.url();
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) {
return w.request.url();
}
}
final Request ne = this.noticeURL.get(urlhash);
if (ne != null) return ne.url();
if (ne != null) {
return ne.url();
}
return null;
}
@ -176,7 +194,9 @@ public class CrawlQueues {
// wait for all workers to finish
final int timeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
for (final Loader w: this.workers.values()) {
if (w.age() > timeout) w.interrupt();
if (w.age() > timeout) {
w.interrupt();
}
}
}
@ -185,7 +205,9 @@ public class CrawlQueues {
final Request[] e = new Request[this.workers.size()];
int i = 0;
for (final Loader w: this.workers.values()) {
if (i >= e.length) break;
if (i >= e.length) {
break;
}
e[i++] = w.request;
}
return e;
@ -197,10 +219,8 @@ public class CrawlQueues {
}
public boolean coreCrawlJob() {
final boolean robinsonPrivateCase = (this.sb.isRobinsonMode() &&
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER) &&
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_CLUSTER));
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER));
if ((robinsonPrivateCase || coreCrawlJobSize() <= 20) && limitCrawlJobSize() > 0) {
// move some tasks to the core crawl job so we have something to do
@ -216,12 +236,16 @@ public class CrawlQueues {
final String queueCheckCore = loadIsPossible(NoticedURL.StackType.CORE);
final String queueCheckNoload = loadIsPossible(NoticedURL.StackType.NOLOAD);
if (queueCheckCore != null && queueCheckNoload != null) {
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
if (this.log.isFine()) {
this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
}
return false;
}
if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: paused");
if (this.log.isFine()) {
this.log.logFine("omitting de-queue/local: paused");
}
return false;
}
@ -238,7 +262,9 @@ public class CrawlQueues {
if (this.noticeURL.stackSize(NoticedURL.StackType.NOLOAD) > 0) {
// get one entry that will not be loaded, just indexed
urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
if (urlEntry == null) continue;
if (urlEntry == null) {
continue;
}
final String profileHandle = urlEntry.profileHandle();
if (profileHandle == null) {
this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
@ -259,7 +285,9 @@ public class CrawlQueues {
}
urlEntry = this.noticeURL.pop(NoticedURL.StackType.CORE, true, this.sb.crawler);
if (urlEntry == null) continue;
if (urlEntry == null) {
continue;
}
final String profileHandle = urlEntry.profileHandle();
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
@ -271,7 +299,9 @@ public class CrawlQueues {
return true;
} catch (final IOException e) {
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.CORE);
if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.CORE);
}
}
}
return true;
@ -293,7 +323,7 @@ public class CrawlQueues {
final DigestURI url = urlEntry.url();
final String urlProtocol = url.getProtocol();
if (this.sb.loader.isSupportedProtocol(urlProtocol)) {
if (this.log.isFine())
if (this.log.isFine()) {
this.log.logFine(stats + ": URL=" + urlEntry.url()
+ ", initiator=" + ((urlEntry.initiator() == null) ? "" : ASCII.String(urlEntry.initiator()))
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
@ -302,6 +332,7 @@ public class CrawlQueues {
+ ", must-match=" + profile.urlMustMatchPattern().toString()
+ ", must-not-match=" + profile.urlMustNotMatchPattern().toString()
+ ", permission=" + ((this.sb.peers == null) ? "undefined" : (((this.sb.peers.mySeed().isSenior()) || (this.sb.peers.mySeed().isPrincipal())) ? "true" : "false")));
}
// work off one Crawl stack entry
if (urlEntry == null || urlEntry.url() == null) {
@ -387,23 +418,31 @@ public class CrawlQueues {
}
// check again
if (this.workers.size() >= this.sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 20)) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
}
return false;
}
final String cautionCause = this.sb.onlineCaution();
if (cautionCause != null) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
}
return false;
}
if (remoteTriggeredCrawlJobSize() > 200) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
}
return false;
}
if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) {
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
if (this.log.isFine()) {
this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
}
return false;
}
@ -414,27 +453,37 @@ public class CrawlQueues {
final Iterator<Seed> e = PeerSelection.getProvidesRemoteCrawlURLs(this.sb.peers);
while (e.hasNext()) {
seed = e.next();
if (seed != null) this.remoteCrawlProviderHashes.add(seed.hash);
if (seed != null) {
this.remoteCrawlProviderHashes.add(seed.hash);
}
}
}
if (this.remoteCrawlProviderHashes.isEmpty()) return false;
}
if (this.remoteCrawlProviderHashes.isEmpty()) {
return false;
}
// take one entry from the provider list and load the entries from the remote peer
seed = null;
String hash = null;
while (seed == null && !this.remoteCrawlProviderHashes.isEmpty()) {
hash = this.remoteCrawlProviderHashes.remove(this.remoteCrawlProviderHashes.size() - 1);
if (hash == null) continue;
if (hash == null) {
continue;
}
seed = this.sb.peers.get(hash);
if (seed == null) continue;
if (seed == null) {
continue;
}
// check if the peer is inside our cluster
if ((this.sb.isRobinsonMode()) && (!this.sb.isInMyCluster(seed))) {
seed = null;
continue;
}
}
if (seed == null) return false;
if (seed == null) {
return false;
}
// we know a peer which should provide remote crawl entries. load them now.
final RSSFeed feed = Protocol.queryRemoteCrawlURLs(this.sb.peers, seed, 60, 8000);
@ -467,7 +516,9 @@ public class CrawlQueues {
final String urlRejectReason = this.sb.crawlStacker.urlInAcceptedDomain(url);
if (urlRejectReason == null) {
// stack url
if (this.sb.getLog().isFinest()) this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
if (this.sb.getLog().isFinest()) {
this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
}
this.sb.crawlStacker.enqueueEntry(new Request(
ASCII.getBytes(hash),
url,
@ -514,12 +565,16 @@ public class CrawlQueues {
// or there is no global crawl on the stack
final String queueCheck = loadIsPossible(NoticedURL.StackType.REMOTE);
if (queueCheck != null) {
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: " + queueCheck);
if (this.log.isFinest()) {
this.log.logFinest("omitting de-queue/remote: " + queueCheck);
}
return false;
}
if (isPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: paused");
if (this.log.isFinest()) {
this.log.logFinest("omitting de-queue/remote: paused");
}
return false;
}
@ -536,7 +591,9 @@ public class CrawlQueues {
return true;
} catch (final IOException e) {
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.REMOTE);
if (e.getMessage().indexOf("hash is null",0) > 0) {
this.noticeURL.clear(NoticedURL.StackType.REMOTE);
}
return true;
}
}
@ -603,7 +660,9 @@ public class CrawlQueues {
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), true);
if (response == null) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
if (CrawlQueues.this.log.isFine()) {
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
}
result = "no content (possibly caused by cache policy)";
} else {
this.request.setStatus("loaded", WorkflowJob.STATUS_RUNNING);
@ -613,7 +672,9 @@ public class CrawlQueues {
}
} catch (final IOException e) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
if (CrawlQueues.this.log.isFine()) {
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
}
result = "load error - " + e.getMessage();
}
@ -646,7 +707,5 @@ public class CrawlQueues {
assert w != null;
}
}
}
}

0
source/de/anomic/crawler/NoticedURL.java Executable file → Normal file
View File

0
source/de/anomic/crawler/ResultImages.java Executable file → Normal file
View File

0
source/de/anomic/crawler/ZURL.java Executable file → Normal file
View File

0
source/de/anomic/crawler/retrieval/Request.java Executable file → Normal file
View File

0
source/de/anomic/crawler/retrieval/Response.java Executable file → Normal file
View File

View File

@ -48,12 +48,15 @@ public class DidYouMean {
private static final char[] ALPHABET_KANJI = new char[512];
static {
// this is very experimental: a very small subset of Kanji
for (char a = '\u3400'; a <= '\u34ff'; a++) ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
for (char a = '\u3400'; a <= '\u34ff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
}
for (char a = '\u4e00'; a <= '\u4eff'; a++) {
ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
}
}
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static char[] alphabet = ALPHABET_LATIN;
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
@ -66,6 +69,7 @@ public class DidYouMean {
private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written
private final SortedSet<StringBuilder> resultSet;
private final indexSizeComparator INDEX_SIZE_COMPARATOR;
private char[] alphabet;
/**
@ -88,25 +92,31 @@ public class DidYouMean {
boolean alphafound = false;
alphatest: for (final char[] alpha: ALPHABETS) {
if (isAlphabet(alpha, testchar)) {
alphabet = alpha;
this.alphabet = new char[alpha.length];
System.arraycopy(ALPHABET_LATIN, 0, this.alphabet, 0, alpha.length);
alphafound = true;
break alphatest;
}
}
if (!alphafound) {
// generate generic alphabet using simply a character block of 256 characters
final char firstchar = (char) ((0xff & (testchar / 256)) * 256);
final char lastchar = (char) (firstchar + 255);
alphabet = new char[256];
for (char a = firstchar; a <= lastchar; a++) {
alphabet[0xff & (a - firstchar)] = a;
final int firstchar = (0xff & (testchar / 256)) * 256;
final int lastchar = firstchar + 255;
this.alphabet = new char[256];
// test this with /suggest.json?q=%EF%BD%84
for (int a = firstchar; a <= lastchar; a++) {
this.alphabet[0xff & (a - firstchar)] = (char) a;
}
}
}
}
private static final boolean isAlphabet(final char[] alpha, final char testchar) {
for (final char a: alpha) if (a == testchar) return true;
for (final char a: alpha) {
if (a == testchar) {
return true;
}
}
return false;
}
@ -125,10 +135,15 @@ public class DidYouMean {
* @return
*/
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short
if (this.word.length() < MinimumInputWordLength)
{
return this.resultSet; // return nothing if input is too short
}
final long startTime = System.currentTimeMillis();
final long timelimit = startTime + timeout;
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) {
return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
}
final SortedSet<StringBuilder> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
@ -138,8 +153,12 @@ public class DidYouMean {
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
for (final StringBuilder s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
if (System.currentTimeMillis() > timelimit) {
break;
}
if (!(scored.sizeSmaller(2 * preSortSelection))) {
break;
}
scored.inc(s, this.index.count(Word.word2hash(s)));
}
final SortedSet<StringBuilder> countSorted = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
@ -147,8 +166,12 @@ public class DidYouMean {
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
final StringBuilder s = scored.getMaxKey();
final int score = scored.delete(s);
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);
if (System.currentTimeMillis() > timelimit) break;
if (s.length() >= MinimumOutputWordLength && score > wc) {
countSorted.add(s);
}
if (System.currentTimeMillis() > timelimit) {
break;
}
}
// finished
@ -180,11 +203,19 @@ public class DidYouMean {
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
StringBuilder sb;
for (int i = 0; i < words.length; i++) {
if (s[i].isEmpty()) continue;
if (s[i].isEmpty()) {
continue;
}
sb = new StringBuilder(20);
for (int j = 0; j < words.length; j++) {
if (j > 0) sb.append(' ');
if (i == j) sb.append(s[j].first()); else sb.append(words[j]);
if (j > 0) {
sb.append(' ');
}
if (i == j) {
sb.append(s[j].first());
} else {
sb.append(words[j]);
}
}
result.add(sb);
}
@ -211,11 +242,13 @@ public class DidYouMean {
// get a single recommendation for the word without altering the word
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(this.word);
for (final StringBuilder t: libr) {
if (!t.equals(this.word)) try {
if (!t.equals(this.word)) {
try {
this.createGen = false;
this.guessLib.put(t);
} catch (final InterruptedException e) {}
}
}
// create and start producers
// the CPU load to create the guessed words is very low, but the testing
@ -226,34 +259,46 @@ public class DidYouMean {
producers[1] = new AddingOneLetter();
producers[2] = new DeletingOneLetter();
producers[3] = new ReversingTwoConsecutiveLetters();
for (final Thread t: producers) t.start();
for (final Thread t: producers) {
t.start();
}
// start more consumers if there are more cores
if (consumers.length > 1) for (int i = 1; i < consumers.length; i++) {
if (consumers.length > 1) {
for (int i = 1; i < consumers.length; i++) {
consumers[i] = new Consumer();
consumers[i].start();
}
}
// now decide which kind of guess is better
// we take guessLib entries as long as there is any entry in it
// to see if this is the case, we must wait for termination of the producer
for (final Thread t: producers) try { t.join(); } catch (final InterruptedException e) {}
for (final Thread t: producers) {
try { t.join(); } catch (final InterruptedException e) {}
}
// if there is not any entry in guessLib, then transfer all entries from the
// guessGen to guessLib
if (this.createGen) try {
if (this.createGen) {
try {
this.guessGen.put(POISON_STRING);
StringBuilder s;
while (!(s = this.guessGen.take()).equals(POISON_STRING)) this.guessLib.put(s);
while (!(s = this.guessGen.take()).equals(POISON_STRING)) {
this.guessLib.put(s);
}
} catch (final InterruptedException e) {}
}
// put poison into guessLib to terminate consumers
for (@SuppressWarnings("unused") final Consumer c: consumers)
for (@SuppressWarnings("unused") final Consumer c: consumers) {
try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {}
}
// wait for termination of consumer
for (final Consumer c: consumers)
for (final Consumer c: consumers) {
try { c.join(); } catch (final InterruptedException e) {}
}
// we don't want the given word in the result
this.resultSet.remove(this.word);
@ -265,7 +310,9 @@ public class DidYouMean {
private void test(final StringBuilder s) throws InterruptedException {
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) this.createGen = false;
if (!libr.isEmpty()) {
this.createGen = false;
}
for (final StringBuilder t: libr) {
this.guessLib.put(t);
}
@ -284,18 +331,22 @@ public class DidYouMean {
@Override
public void run() {
char m;
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
m = DidYouMean.this.word.charAt(i);
for (final char c: alphabet) {
for (final char c: DidYouMean.this.alphabet) {
if (m != c) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
test(ts);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
}
}
/**
* DidYouMean's producer thread that deletes extra letters (e.g. frog/fog) for a given term
@ -306,12 +357,16 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -324,15 +379,19 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i <= DidYouMean.this.wordLen; i++) try {
for (final char c: alphabet) {
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
try {
for (final char c: DidYouMean.this.alphabet) {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
}
}
/**
* DidYouMean's producer thread that reverses any two consecutive letters (e.g. two/tow) for a given term
@ -343,12 +402,16 @@ public class DidYouMean {
@Override
public void run() {
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) try {
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
try {
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
test(ts);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
} catch (final InterruptedException e) {}
}
}
}
@ -364,8 +427,12 @@ public class DidYouMean {
StringBuilder s;
try {
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) DidYouMean.this.resultSet.add(s);
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) {
DidYouMean.this.resultSet.add(s);
}
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
return;
}
}
} catch (final InterruptedException e) {}
}
@ -377,10 +444,13 @@ public class DidYouMean {
*/
private class indexSizeComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = DidYouMean.this.index.count(Word.word2hash(o1));
final int i2 = DidYouMean.this.index.count(Word.word2hash(o2));
if (i1 == i2) return WORD_LENGTH_COMPARATOR.compare(o1, o2);
if (i1 == i2) {
return WORD_LENGTH_COMPARATOR.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result
}
}
@ -391,10 +461,13 @@ public class DidYouMean {
*/
private static class wordLengthComparator implements Comparator<StringBuilder> {
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final int i1 = o1.length();
final int i2 = o2.length();
if (i1 == i2) return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
if (i1 == i2) {
return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
}
return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first
}
@ -411,10 +484,13 @@ public class DidYouMean {
this.secondaryComparator = secondaryComparator;
}
@Override
public int compare(final StringBuilder o1, final StringBuilder o2) {
final boolean o1m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o1, this.head);
final boolean o2m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o2, this.head);
if ((o1m && o2m) || (!o1m && !o2m)) return this.secondaryComparator.compare(o1, o2);
if ((o1m && o2m) || (!o1m && !o2m)) {
return this.secondaryComparator.compare(o1, o2);
}
return o1m ? -1 : 1;
}
}

0
source/net/yacy/ai/example/ConnectFour.java Executable file → Normal file
View File

0
source/net/yacy/ai/example/testorder.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/AbstractFinding.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/AbstractModel.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Agent.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Asset.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Attempts.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Battle.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Challenge.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Context.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Engine.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Finding.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Goal.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Model.java Executable file → Normal file
View File

0
source/net/yacy/ai/greedy/Role.java Executable file → Normal file
View File

0
source/net/yacy/cora/protocol/RequestHeader.java Executable file → Normal file
View File

0
source/net/yacy/cora/protocol/ResponseHeader.java Executable file → Normal file
View File

42
source/net/yacy/kelondro/blob/ArrayStack.java Executable file → Normal file
View File

@ -215,12 +215,14 @@ public class ArrayStack implements BLOB {
}
}
@Override
public long mem() {
long m = 0;
if (this.blobs != null) for (final blobItem b: this.blobs) m += b.blob.mem();
return m;
}
@Override
public void trim() {
// trim shall not be called for ArrayStacks because the characteristics of an ArrayStack is that the 'topmost' BLOB on the stack
// is used for write operations and all other shall be trimmed automatically since they are not used for writing. And the
@ -374,6 +376,7 @@ public class ArrayStack implements BLOB {
return new File(this.heapLocation, this.prefix + "." + my_SHORT_MILSEC_FORMATTER.format(creation) + ".blob");
}
@Override
public String name() {
return this.heapLocation.getName();
}
@ -414,12 +417,14 @@ public class ArrayStack implements BLOB {
/*
* return the size of the repository (in bytes)
*/
@Override
public synchronized long length() {
long s = 0;
for (int i = 0; i < this.blobs.size(); i++) s += this.blobs.get(i).location.length();
return s;
}
@Override
public ByteOrder ordering() {
return this.ordering;
}
@ -446,6 +451,7 @@ public class ArrayStack implements BLOB {
* ask for the length of the primary key
* @return the length of the key
*/
@Override
public int keylength() {
return this.keylength;
}
@ -454,6 +460,7 @@ public class ArrayStack implements BLOB {
* clears the content of the database
* @throws IOException
*/
@Override
public synchronized void clear() throws IOException {
for (final blobItem bi: this.blobs) {
bi.blob.clear();
@ -467,12 +474,14 @@ public class ArrayStack implements BLOB {
* ask for the number of blob entries
* @return the number of entries in the table
*/
@Override
public synchronized int size() {
int s = 0;
for (final blobItem bi: this.blobs) s += bi.blob.size();
return s;
}
@Override
public synchronized boolean isEmpty() {
for (final blobItem bi: this.blobs) if (!bi.blob.isEmpty()) return false;
return true;
@ -497,6 +506,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
assert rotating == false;
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
@ -514,6 +524,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
final Iterator<blobItem> i = this.blobs.iterator();
@ -529,6 +540,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public synchronized boolean containsKey(final byte[] key) {
final blobItem bi = keeperOf(key);
return bi != null;
@ -550,14 +562,27 @@ public class ArrayStack implements BLOB {
return null;
}
// first check the current blob only because that has most probably the key if any has that key
int bs1 = this.blobs.size() - 1;
blobItem bi = this.blobs.get(bs1);
if (bi.blob.containsKey(key)) return bi;
if (this.blobs.size() == 2) {
// this should not be done concurrently
bi = this.blobs.get(0);
if (bi.blob.containsKey(key)) return bi;
return null;
}
// start a concurrent query to database tables
final CompletionService<blobItem> cs = new ExecutorCompletionService<blobItem>(this.executor);
int accepted = 0;
for (final blobItem bi : this.blobs) {
for (int i = 0; i < bs1; i++) {
final blobItem b = this.blobs.get(i);
try {
cs.submit(new Callable<blobItem>() {
@Override
public blobItem call() {
if (bi.blob.containsKey(key)) return bi;
if (b.blob.containsKey(key)) return b;
return null;
}
});
@ -565,7 +590,7 @@ public class ArrayStack implements BLOB {
} catch (final RejectedExecutionException e) {
// the executor is either shutting down or the blocking queue is full
// execute the search direct here without concurrency
if (bi.blob.containsKey(key)) return bi;
if (b.blob.containsKey(key)) return b;
}
}
@ -599,6 +624,7 @@ public class ArrayStack implements BLOB {
* @return
* @throws IOException
*/
@Override
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
if (this.blobs.size() == 0) return null;
if (this.blobs.size() == 1) {
@ -619,6 +645,7 @@ public class ArrayStack implements BLOB {
*/
}
@Override
public byte[] get(final Object key) {
if (!(key instanceof byte[])) return null;
try {
@ -652,6 +679,7 @@ public class ArrayStack implements BLOB {
this.key = key;
}
@Override
protected byte[] next0() {
while (this.bii.hasNext()) {
final BLOB b = this.bii.next().blob;
@ -677,6 +705,7 @@ public class ArrayStack implements BLOB {
* @return the size of the BLOB or -1 if the BLOB does not exist
* @throws IOException
*/
@Override
public synchronized long length(final byte[] key) throws IOException {
long l;
for (final blobItem bi: this.blobs) {
@ -707,6 +736,7 @@ public class ArrayStack implements BLOB {
this.key = key;
}
@Override
protected Long next0() {
while (this.bii.hasNext()) {
final BLOB b = this.bii.next().blob;
@ -744,6 +774,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized void insert(final byte[] key, final byte[] b) throws IOException {
blobItem bi = (this.blobs.isEmpty()) ? null : this.blobs.get(this.blobs.size() - 1);
/*
@ -770,6 +801,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
int d = 0;
for (final blobItem bi: this.blobs) {
@ -784,6 +816,7 @@ public class ArrayStack implements BLOB {
* @throws IOException
* @throws RowSpaceExceededException
*/
@Override
public synchronized int reduce(final byte[] key, final Reducer reduce) throws IOException, RowSpaceExceededException {
int d = 0;
for (final blobItem bi: this.blobs) {
@ -797,6 +830,7 @@ public class ArrayStack implements BLOB {
* @param key the primary key
* @throws IOException
*/
@Override
public synchronized void delete(final byte[] key) throws IOException {
final long m = mem();
if (this.blobs.size() == 0) {
@ -812,6 +846,7 @@ public class ArrayStack implements BLOB {
// run this in a concurrent thread
final blobItem bi0 = bi;
t[i] = new Thread() {
@Override
public void run() {
try { bi0.blob.delete(key); } catch (final IOException e) {}
}
@ -831,6 +866,7 @@ public class ArrayStack implements BLOB {
/**
* close the BLOB
*/
@Override
public synchronized void close(final boolean writeIDX) {
for (final blobItem bi: this.blobs) bi.blob.close(writeIDX);
this.blobs.clear();

0
source/net/yacy/kelondro/blob/Heap.java Executable file → Normal file
View File

View File

@ -0,0 +1,540 @@
package net.yacy.kelondro.data.meta;
import net.yacy.cora.lod.Node;
import net.yacy.cora.lod.vocabulary.Rdf;
import net.yacy.kelondro.data.word.WordReferenceVars;
public class URIMetadataNode /*implements URIMetadata*/ {
private final Node entry;
private final String snippet;
private final WordReferenceVars word; // this is only used if the url is transported via remote search requests
private final long ranking; // during generation of a search result this value is set
public URIMetadataNode() {
// create a dummy entry, good to produce poison objects
this.entry = new Node(Rdf.Description);
this.snippet = null;
this.word = null;
this.ranking = 0;
}
/*
public URIMetadataNode(
final DigestURI url,
final String dc_title,
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lon, final float lat, // decimal degrees as in WGS84; if unknown both values may be 0.0f;
final Date mod,
final Date load,
final Date fresh,
final String referrer,
final byte[] md5,
final long size,
final int wc,
final char dt,
final Bitfield flags,
final byte[] lang,
final int llocal,
final int lother,
final int laudio,
final int limage,
final int lvideo,
final int lapp) {
// create new entry
this.entry = new Node();
this.entry.setSubject(UTF8.getBytes(url.toNormalform(true, false)));
this.entry.setObject(YaCyMetadata.hash, url.hash());
this.entry.setObject(DublinCore.Title, UTF8.getBytes(dc_title));
this.entry.setObject(DublinCore.Creator, UTF8.getBytes(dc_creator));
this.entry.setObject(DublinCore.Subject, UTF8.getBytes(dc_subject));
this.entry.setObject(DublinCore.Publisher, UTF8.getBytes(dc_publisher));
this.entry.setObject(Geo.Lat, ASCII.getBytes(Float.toString(lat)));
this.entry.setObject(Geo.Long, ASCII.getBytes(Float.toString(lon)));
encodeDate(col_mod, mod);
encodeDate(col_load, load);
encodeDate(col_fresh, fresh);
this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer));
this.entry.setCol(col_md5, md5);
this.entry.setCol(col_size, size);
this.entry.setCol(col_wc, wc);
this.entry.setCol(col_dt, new byte[]{(byte) dt});
this.entry.setCol(col_flags, flags.bytes());
this.entry.setCol(col_lang, lang);
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_limage, limage);
this.entry.setCol(col_laudio, laudio);
this.entry.setCol(col_lvideo, lvideo);
this.entry.setCol(col_lapp, lapp);
//System.out.println("===DEBUG=== " + load.toString() + ", " + decodeDate(col_load).toString());
this.snippet = null;
this.word = null;
this.ranking = 0;
this.comp = null;
}
private byte[] encodeDate(final Date d) {
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
// 86400000 is the number of milliseconds in one day
return NaturalOrder.encodeLong(d.getTime() / 86400000L, 4);
}
private Date decodeDate(final int col) {
final long t = this.entry.getColLong(col);
}
public static byte[] encodeComp(
final DigestURI url,
final String dc_title,
final String dc_creator,
final String dc_subject,
final String dc_publisher,
final float lat,
final float lon) {
final CharBuffer s = new CharBuffer(360);
s.append(url.toNormalform(false, true)).appendLF();
s.append(dc_title).appendLF();
if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
s.appendLF();
if (dc_subject.length() > 120) s.append(dc_subject, 0, 120); else s.append(dc_subject);
s.appendLF();
if (dc_publisher.length() > 80) s.append(dc_publisher, 0, 80); else s.append(dc_publisher);
s.appendLF();
if (lon == 0.0f && lat == 0.0f) s.appendLF(); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).appendLF();
return UTF8.getBytes(s.toString());
}
public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
this.entry = entry;
this.snippet = null;
this.word = searchedWord;
this.ranking = ranking;
this.comp = null;
}
public URIMetadataRow(final Properties prop) {
// generates an plasmaLURLEntry using the properties from the argument
// the property names must correspond to the one from toString
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
DigestURI url;
try {
url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), ASCII.getBytes(prop.getProperty("hash")));
} catch (final MalformedURLException e) {
url = null;
}
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
this.entry = rowdef.newEntry();
this.entry.setCol(col_hash, url.hash()); // FIXME potential null pointer access
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
// create new formatters to make concurrency possible
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
encodeDate(col_mod, formatter.parse(prop.getProperty("mod", "20000101")));
} catch (final ParseException e) {
encodeDate(col_mod, new Date());
}
try {
encodeDate(col_load, formatter.parse(prop.getProperty("load", "20000101")));
} catch (final ParseException e) {
encodeDate(col_load, new Date());
}
try {
encodeDate(col_fresh, formatter.parse(prop.getProperty("fresh", "20000101")));
} catch (final ParseException e) {
encodeDate(col_fresh, new Date());
}
this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", "")));
this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
final String dt = prop.getProperty("dt", "t");
this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
final String flags = prop.getProperty("flags", "AAAAAA");
this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk")));
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));
this.entry.setCol(col_laudio, Integer.parseInt(prop.getProperty("laudio", "0")));
this.entry.setCol(col_lvideo, Integer.parseInt(prop.getProperty("lvideo", "0")));
this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
this.word = null;
if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
if (prop.containsKey("wi")) {
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
}
this.ranking = 0;
this.comp = null;
}
public static URIMetadataRow importEntry(final String propStr) {
if (propStr == null || (propStr.length() > 0 && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
return null;
}
try {
return new URIMetadataRow(MapTools.s2p(propStr.substring(1, propStr.length() - 1)));
} catch (final kelondroException e) {
// wrong format
return null;
}
}
private StringBuilder corePropList() {
// generate a parseable string; this is a simple property-list
final Components metadata = metadata();
final StringBuilder s = new StringBuilder(300);
if (metadata == null) return null;
//System.out.println("author=" + comp.author());
// create new formatters to make concurrency possible
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
try {
s.append("hash=").append(ASCII.String(hash()));
assert (s.toString().indexOf(0) < 0);
s.append(",url=").append(crypt.simpleEncode(metadata.url().toNormalform(false, true)));
assert (s.toString().indexOf(0) < 0);
s.append(",descr=").append(crypt.simpleEncode(metadata.dc_title()));
assert (s.toString().indexOf(0) < 0);
s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
assert (s.toString().indexOf(0) < 0);
s.append(",tags=").append(crypt.simpleEncode(metadata.dc_subject()));
assert (s.toString().indexOf(0) < 0);
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
assert (s.toString().indexOf(0) < 0);
s.append(",lat=").append(metadata.lat());
assert (s.toString().indexOf(0) < 0);
s.append(",lon=").append(metadata.lon());
assert (s.toString().indexOf(0) < 0);
s.append(",mod=").append(formatter.format(moddate()));
assert (s.toString().indexOf(0) < 0);
s.append(",load=").append(formatter.format(loaddate()));
assert (s.toString().indexOf(0) < 0);
s.append(",fresh=").append(formatter.format(freshdate()));
assert (s.toString().indexOf(0) < 0);
s.append(",referrer=").append(referrerHash() == null ? "" : ASCII.String(referrerHash()));
assert (s.toString().indexOf(0) < 0);
s.append(",md5=").append(md5());
assert (s.toString().indexOf(0) < 0);
s.append(",size=").append(size());
assert (s.toString().indexOf(0) < 0);
s.append(",wc=").append(wordCount());
assert (s.toString().indexOf(0) < 0);
s.append(",dt=").append(doctype());
assert (s.toString().indexOf(0) < 0);
s.append(",flags=").append(flags().exportB64());
assert (s.toString().indexOf(0) < 0);
s.append(",lang=").append(language() == null ? "EN" : UTF8.String(language()));
assert (s.toString().indexOf(0) < 0);
s.append(",llocal=").append(llocal());
assert (s.toString().indexOf(0) < 0);
s.append(",lother=").append(lother());
assert (s.toString().indexOf(0) < 0);
s.append(",limage=").append(limage());
assert (s.toString().indexOf(0) < 0);
s.append(",laudio=").append(laudio());
assert (s.toString().indexOf(0) < 0);
s.append(",lvideo=").append(lvideo());
assert (s.toString().indexOf(0) < 0);
s.append(",lapp=").append(lapp());
assert (s.toString().indexOf(0) < 0);
if (this.word != null) {
// append also word properties
final String wprop = this.word.toPropertyForm();
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
}
assert (s.toString().indexOf(0) < 0);
return s;
} catch (final Throwable e) {
// serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
// if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
// if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
Log.logException(e);
return null;
}
}
public Row.Entry toRowEntry() {
return this.entry;
}
public byte[] hash() {
// return a url-hash, based on the md5 algorithm
// the result is a String of 12 bytes within a 72-bit space
// (each byte has an 6-bit range)
// that should be enough for all web pages on the world
return this.entry.getPrimaryKeyBytes();
}
public long ranking() {
return this.ranking;
}
public boolean matches(final Pattern matcher) {
return this.metadata().matches(matcher);
}
public DigestURI url() {
return this.metadata().url();
}
public String dc_title() {
return this.metadata().dc_title();
}
public String dc_creator() {
return this.metadata().dc_creator();
}
public String dc_publisher() {
return this.metadata().dc_publisher();
}
public String dc_subject() {
return this.metadata().dc_subject();
}
public float lat() {
return this.metadata().lat();
}
public float lon() {
return this.metadata().lon();
}
private Components metadata() {
// avoid double computation of metadata elements
if (this.comp != null) return this.comp;
// parse elements from comp field;
final byte[] c = this.entry.getColBytes(col_comp, true);
final List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
this.comp = new Components(
(cl.size() > 0) ? UTF8.String(cl.get(0)) : "",
hash(),
(cl.size() > 1) ? UTF8.String(cl.get(1)) : "",
(cl.size() > 2) ? UTF8.String(cl.get(2)) : "",
(cl.size() > 3) ? UTF8.String(cl.get(3)) : "",
(cl.size() > 4) ? UTF8.String(cl.get(4)) : "",
(cl.size() > 5) ? UTF8.String(cl.get(5)) : "");
return this.comp;
}
public Date moddate() {
return decodeDate(col_mod);
}
public Date loaddate() {
return decodeDate(col_load);
}
public Date freshdate() {
return decodeDate(col_fresh);
}
public byte[] referrerHash() {
// return the creator's hash or null if there is none
// FIXME: There seem to be some malformed entries in the databasees like "null\0\0\0\0\0\0\0\0"
final byte[] r = this.entry.getColBytes(col_referrer, true);
if (r != null) {
int i = r.length;
while (i > 0) {
if (r[--i] == 0) return null;
}
}
return r;
}
public String md5() {
// returns the md5 in hex representation
return Digest.encodeHex(this.entry.getColBytes(col_md5, true));
}
public char doctype() {
return (char) this.entry.getColByte(col_dt);
}
public byte[] language() {
byte[] b = this.entry.getColBytes(col_lang, true);
if (b == null || b[0] == (byte)'[') {
String tld = this.metadata().url.getTLD();
if (tld.length() < 2 || tld.length() > 2) return ASCII.getBytes("en");
return ASCII.getBytes(tld);
}
return b;
}
public int size() {
return (int) this.entry.getColLong(col_size);
}
public Bitfield flags() {
return new Bitfield(this.entry.getColBytes(col_flags, true));
}
public int wordCount() {
return (int) this.entry.getColLong(col_wc);
}
public int llocal() {
return (int) this.entry.getColLong(col_llocal);
}
public int lother() {
return (int) this.entry.getColLong(col_lother);
}
public int limage() {
return (int) this.entry.getColLong(col_limage);
}
public int laudio() {
return (int) this.entry.getColLong(col_laudio);
}
public int lvideo() {
return (int) this.entry.getColLong(col_lvideo);
}
public int lapp() {
return (int) this.entry.getColLong(col_lapp);
}
public String snippet() {
// the snippet may appear here if the url was transported in a remote search
// it will not be saved anywhere, but can only be requested here
return this.snippet;
}
public WordReferenceVars word() {
return this.word;
}
public boolean isOlder(final URIMetadata other) {
if (other == null) return false;
final Date tmoddate = moddate();
final Date omoddate = other.moddate();
if (tmoddate.before(omoddate)) return true;
if (tmoddate.equals(omoddate)) {
final Date tloaddate = loaddate();
final Date oloaddate = other.loaddate();
if (tloaddate.before(oloaddate)) return true;
if (tloaddate.equals(oloaddate)) return true;
}
return false;
}
public String toString(final String snippet) {
// add information needed for remote transport
final StringBuilder core = corePropList();
if (core == null)
return null;
core.ensureCapacity(core.length() + snippet.length() * 2);
core.insert(0, "{");
core.append(",snippet=").append(crypt.simpleEncode(snippet));
core.append("}");
return core.toString();
//return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
}
public Request toBalancerEntry(final String initiatorHash) {
return new Request(
ASCII.getBytes(initiatorHash),
metadata().url(),
referrerHash(),
metadata().dc_title(),
moddate(),
null,
0,
0,
0,
0);
}
@Override
public String toString() {
final StringBuilder core = corePropList();
if (core == null) return null;
core.insert(0, "{");
core.append("}");
return core.toString();
//return "{" + core + "}";
}
private class Components {
private DigestURI url;
private String urlRaw;
private byte[] urlHash;
private final String dc_title, dc_creator, dc_subject, dc_publisher;
private final String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
public Components(
final String urlRaw,
final byte[] urlhash,
final String title,
final String author,
final String tags,
final String publisher,
final String latlon) {
this.url = null;
this.urlRaw = urlRaw;
this.urlHash = urlhash;
this.dc_title = title;
this.dc_creator = author;
this.dc_subject = tags;
this.dc_publisher = publisher;
this.latlon = latlon;
}
public boolean matches(final Pattern matcher) {
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
return false;
}
public DigestURI url() {
if (this.url == null) {
try {
this.url = new DigestURI(this.urlRaw, this.urlHash);
} catch (final MalformedURLException e) {
this.url = null;
}
this.urlRaw = null;
this.urlHash = null;
}
return this.url;
}
public String dc_title() { return this.dc_title; }
public String dc_creator() { return this.dc_creator; }
public String dc_publisher() { return this.dc_publisher; }
public String dc_subject() { return this.dc_subject; }
public float lat() {
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
final int p = this.latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(0, p));
}
public float lon() {
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
final int p = this.latlon.indexOf(',');
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
}
}
*/
}

View File

@ -157,7 +157,16 @@ public final class Records {
* @throws IOException
*/
private final long filesize() throws IOException {
return raf.length() / recordsize;
long records = 0;
try {
records = raf.length() / recordsize;
} catch (NullPointerException e) {
// This may happen on shutdown while still something is moving on
Log.logException(e);
}
return records;
}
/**

0
source/net/yacy/kelondro/table/Relations.java Executable file → Normal file
View File

0
source/net/yacy/kelondro/util/ISO639.java Executable file → Normal file
View File

View File

@ -61,9 +61,11 @@ import net.yacy.peers.operation.yacySeedUploadFtp;
import net.yacy.peers.operation.yacySeedUploadScp;
import net.yacy.peers.operation.yacySeedUploader;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.server.serverCore;
public class Network {
public class Network
{
// statics
public static final ThreadGroup publishThreadGroup = new ThreadGroup("publishThreadGroup");
@ -127,7 +129,9 @@ public class Network {
}
public final void publishSeedList() {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
if ( log.isFine() ) {
log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
}
/*
if (oldIPStamp.equals((String) seedDB.mySeed.get(yacySeed.IP, "127.0.0.1")))
@ -138,24 +142,24 @@ public class Network {
yacyCore.log.logDebug("***DEBUG publishSeedList: I can reach myself");
*/
if ((this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP())) &&
(this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected()) &&
(canReachMyself()) &&
(System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24) &&
(this.sb.peers.mySeed().isPrincipal())
) {
if (log.isFine()) log.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
if ( (this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP()))
&& (this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected())
&& (canReachMyself())
&& (System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24)
&& (this.sb.peers.mySeed().isPrincipal()) ) {
if ( log.isFine() ) {
log
.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
}
return;
}
// getting the seed upload method that should be used ...
final String seedUploadMethod = this.sb.getConfig("seedUploadMethod", "");
if (
(!seedUploadMethod.equalsIgnoreCase("none")) ||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0)) ||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0))
) {
if ( (!seedUploadMethod.equalsIgnoreCase("none"))
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0))
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0)) ) {
if ( seedUploadMethod.equals("") ) {
if ( this.sb.getConfig("seedFTPPassword", "").length() > 0 ) {
this.sb.setConfig("seedUploadMethod", "Ftp");
@ -170,13 +174,16 @@ public class Network {
if ( seedUploadMethod.equals("") ) {
this.sb.setConfig("seedUploadMethod", "none");
}
if (log.isFine()) log.logFine("yacyCore.publishSeedList: No uploading method configured");
if ( log.isFine() ) {
log.logFine("yacyCore.publishSeedList: No uploading method configured");
}
return;
}
}
public final void peerPing() {
if ((this.sb.isRobinsonMode()) && (this.sb.getConfig("cluster.mode", "").equals("privatepeer"))) {
if ( (this.sb.isRobinsonMode())
&& (this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER)) ) {
// in case this peer is a privat peer we omit the peer ping
// all other robinson peer types do a peer ping:
// the privatecluster does the ping to the other cluster members
@ -192,11 +199,17 @@ public class Network {
if ( this.sb.peers.sizeConnected() == 0 ) {
// reload the seed lists
this.sb.loadSeedLists();
log.logInfo("re-initialized seed list. received " + this.sb.peers.sizeConnected() + " new peer(s)");
log.logInfo("re-initialized seed list. received "
+ this.sb.peers.sizeConnected()
+ " new peer(s)");
}
final int newSeeds = publishMySeed(false);
if ( newSeeds > 0 ) {
log.logInfo("received " + newSeeds + " new peer(s), know a total of " + this.sb.peers.sizeConnected() + " different peers");
log.logInfo("received "
+ newSeeds
+ " new peer(s), know a total of "
+ this.sb.peers.sizeConnected()
+ " different peers");
}
}
@ -215,16 +228,22 @@ public class Network {
}
// use our own formatter to prevent concurrency locks with other processes
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second);
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(
GenericFormatter.FORMAT_SHORT_SECOND,
GenericFormatter.time_second);
protected class publishThread extends Thread {
protected class publishThread extends Thread
{
int added;
private final Seed seed;
private final Semaphore sync;
private final List<Thread> syncList;
public publishThread(final ThreadGroup tg, final Seed seed,
final Semaphore sync, final List<Thread> syncList) throws InterruptedException {
public publishThread(
final ThreadGroup tg,
final Seed seed,
final Semaphore sync,
final List<Thread> syncList) throws InterruptedException {
super(tg, "PublishSeed_" + seed.getName());
this.sync = sync;
@ -235,50 +254,106 @@ public class Network {
this.added = 0;
}
@Override
public final void run() {
try {
this.added = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed.getClusterAddress(), this.seed.hash, this.seed.getName());
this.added =
Protocol.hello(
Network.this.sb.peers.mySeed(),
Network.this.sb.peers.peerActions,
this.seed.getClusterAddress(),
this.seed.hash,
this.seed.getName());
if ( this.added < 0 ) {
// no or wrong response, delete that address
final String cause = "peer ping to peer resulted in error response (added < 0)";
log.logInfo("publish: disconnected " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' from " + this.seed.getPublicAddress() + ": " + cause);
log.logInfo("publish: disconnected "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' from "
+ this.seed.getPublicAddress()
+ ": "
+ cause);
Network.this.sb.peers.peerActions.peerDeparture(this.seed, cause);
} else {
// success! we have published our peer to a senior peer
// update latest news from the other peer
log.logInfo("publish: handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress());
log.logInfo("publish: handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress());
// check if seed's lastSeen has been updated
final Seed newSeed = Network.this.sb.peers.getConnected(this.seed.hash);
if ( newSeed != null ) {
if ( !newSeed.isOnline() ) {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " is not online." +
" Removing Peer from connected");
if ( log.isFine() ) {
log.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " is not online."
+ " Removing Peer from connected");
}
Network.this.sb.peers.peerActions.peerDeparture(newSeed, "peer not online");
} else
if (newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000)) {
} else if ( newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000) ) {
// update last seed date
if ( newSeed.getLastSeenUTC() >= this.seed.getLastSeenUTC() ) {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "'");
if ( log.isFine() ) {
log
.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " with old LastSeen: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
.getLastSeenUTC())) + "'");
}
newSeed.setLastSeenUTC();
Network.this.sb.peers.peerActions.peerArrival(newSeed, true);
} else {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "', this is more recent: '" +
my_SHORT_SECOND_FORMATTER.format(new Date(this.seed.getLastSeenUTC())) + "'");
if ( log.isFine() ) {
log
.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " with old LastSeen: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
.getLastSeenUTC()))
+ "', this is more recent: '"
+ my_SHORT_SECOND_FORMATTER.format(new Date(this.seed
.getLastSeenUTC()))
+ "'");
}
this.seed.setLastSeenUTC();
Network.this.sb.peers.peerActions.peerArrival(this.seed, true);
}
}
} else {
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " not in connectedDB");
if ( log.isFine() ) {
log.logFine("publish: recently handshaked "
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
+ " peer '"
+ this.seed.getName()
+ "' at "
+ this.seed.getPublicAddress()
+ " not in connectedDB");
}
}
}
} catch ( final Exception e ) {
log.logSevere("publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(), e);
log.logSevere(
"publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(),
e);
} finally {
this.syncList.add(this);
this.sync.release();
@ -311,9 +386,12 @@ public class Network {
// getting a list of peers to contact
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN).equals(Seed.PEERTYPE_VIRGIN) ) {
if (attempts > PING_INITIAL) { attempts = PING_INITIAL; }
if ( attempts > PING_INITIAL ) {
attempts = PING_INITIAL;
}
final Map<byte[], String> ch = Switchboard.getSwitchboard().clusterhashes;
seeds = PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
seeds =
PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
// add also all peers from cluster if this is a public robinson cluster
if ( ch != null ) {
final Iterator<Map.Entry<byte[], String>> i = ch.entrySet().iterator();
@ -326,7 +404,9 @@ public class Network {
seed = seeds.get(hash);
if ( seed == null ) {
seed = this.sb.peers.get(hash);
if (seed == null) continue;
if ( seed == null ) {
continue;
}
}
seed.setAlternativeAddress(entry.getValue());
seeds.put(hash, seed);
@ -336,15 +416,23 @@ public class Network {
int diff = PING_MIN_DBSIZE - amIAccessibleDB.size();
if ( diff > PING_MIN_RUNNING ) {
diff = Math.min(diff, PING_MAX_RUNNING);
if (attempts > diff) { attempts = diff; }
if ( attempts > diff ) {
attempts = diff;
}
} else {
if (attempts > PING_MIN_RUNNING) { attempts = PING_MIN_RUNNING; }
if ( attempts > PING_MIN_RUNNING ) {
attempts = PING_MIN_RUNNING;
}
}
seeds = PeerSelection.seedsByAge(this.sb.peers, false, attempts); // best for seed list maintenance/cleaning
}
if (seeds == null || seeds.isEmpty()) { return 0; }
if (seeds.size() < attempts) { attempts = seeds.size(); }
if ( seeds == null || seeds.isEmpty() ) {
return 0;
}
if ( seeds.size() < attempts ) {
attempts = seeds.size();
}
// This will try to get Peers that are not currently in amIAccessibleDB
final Iterator<Seed> si = seeds.values().iterator();
@ -380,11 +468,16 @@ public class Network {
i++;
final String address = seed.getClusterAddress();
if (log.isFine()) log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
if ( log.isFine() ) {
log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
}
final String seederror = seed.isProper(false);
if ( (address == null) || (seederror != null) ) {
// we don't like that address, delete it
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror);
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = "
+ address
+ "; seederror = "
+ seederror);
sync.acquire();
} else {
// starting a new publisher thread
@ -438,16 +531,24 @@ public class Network {
}
}
}
if (log.isFine()) log.logFine("DBSize before -> after Cleanup: " + dbSize + " -> " + amIAccessibleDB.size());
if ( log.isFine() ) {
log
.logFine("DBSize before -> after Cleanup: "
+ dbSize
+ " -> "
+ amIAccessibleDB.size());
}
log.logInfo("PeerPing: I am accessible for " + accessible +
" peer(s), not accessible for " + notaccessible + " peer(s).");
}
log.logInfo("PeerPing: I am accessible for "
+ accessible
+ " peer(s), not accessible for "
+ notaccessible
+ " peer(s).");
if ( (accessible + notaccessible) > 0 ) {
final String newPeerType;
// At least one other Peer told us our type
if ((accessible >= PING_MIN_PEERSEEN) ||
(accessible >= notaccessible)) {
if ( (accessible >= PING_MIN_PEERSEEN) || (accessible >= notaccessible) ) {
// We can be reached from a majority of other Peers
if ( this.sb.peers.mySeed().isPrincipal() ) {
newPeerType = Seed.PEERTYPE_PRINCIPAL;
@ -461,7 +562,11 @@ public class Network {
if ( this.sb.peers.mySeed().orVirgin().equals(newPeerType) ) {
log.logInfo("PeerPing: myType is " + this.sb.peers.mySeed().orVirgin());
} else {
log.logInfo("PeerPing: changing myType from '" + this.sb.peers.mySeed().orVirgin() + "' to '" + newPeerType + "'");
log.logInfo("PeerPing: changing myType from '"
+ this.sb.peers.mySeed().orVirgin()
+ "' to '"
+ newPeerType
+ "'");
this.sb.peers.mySeed().put(Seed.PEERTYPE, newPeerType);
}
} else {
@ -474,8 +579,12 @@ public class Network {
this.sb.peers.saveMySeed();
// if we have an address, we do nothing
if (this.sb.peers.mySeed().isProper(true) == null && !force) { return 0; }
if (newSeeds > 0) return newSeeds;
if ( this.sb.peers.mySeed().isProper(true) == null && !force ) {
return 0;
}
if ( newSeeds > 0 ) {
return newSeeds;
}
// still no success: ask own NAT or internet responder
//final boolean DI604use = switchboard.getConfig("DI604use", "false").equals("true");
@ -484,11 +593,16 @@ public class Network {
//if (ip.equals("")) ip = natLib.retrieveIP(DI604use, DI604pw);
// yacyCore.log.logDebug("DEBUG: new IP=" + ip);
if (Seed.isProperIP(ip) == null) this.sb.peers.mySeed().setIP(ip);
if (this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR)) // ???????????????
if ( Seed.isProperIP(ip) == null ) {
this.sb.peers.mySeed().setIP(ip);
}
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR) ) {
this.sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); // to start bootstraping, we need to be recognised as PEERTYPE_SENIOR peer
log.logInfo("publish: no recipient found, our address is " +
((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers.mySeed().getPublicAddress()));
}
log.logInfo("publish: no recipient found, our address is "
+ ((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers
.mySeed()
.getPublicAddress()));
this.sb.peers.saveMySeed();
return 0;
} catch ( final InterruptedException e ) {
@ -499,11 +613,16 @@ public class Network {
Thread.interrupted();
// interrupt all already started publishThreads
log.logInfo("publish: Signaling shutdown to " + Network.publishThreadGroup.activeCount() + " remaining publishing threads ...");
log.logInfo("publish: Signaling shutdown to "
+ Network.publishThreadGroup.activeCount()
+ " remaining publishing threads ...");
Network.publishThreadGroup.interrupt();
// waiting some time for the publishThreads to finish execution
try { Thread.sleep(500); } catch (final InterruptedException ex) {}
try {
Thread.sleep(500);
} catch ( final InterruptedException ex ) {
}
// getting the amount of remaining publishing threads
int threadCount = Network.publishThreadGroup.activeCount();
@ -511,20 +630,33 @@ public class Network {
threadCount = Network.publishThreadGroup.enumerate(threadList);
// we need to use a timeout here because of missing interruptable session threads ...
if (log.isFine()) log.logFine("publish: Waiting for " + Network.publishThreadGroup.activeCount() + " remaining publishing threads to finish shutdown ...");
if ( log.isFine() ) {
log.logFine("publish: Waiting for "
+ Network.publishThreadGroup.activeCount()
+ " remaining publishing threads to finish shutdown ...");
}
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
final Thread currentThread = threadList[currentThreadIdx];
if ( currentThread.isAlive() ) {
if (log.isFine()) log.logFine("publish: Waiting for remaining publishing thread '" + currentThread.getName() + "' to finish shutdown");
try { currentThread.join(500); } catch (final InterruptedException ex) {}
if ( log.isFine() ) {
log.logFine("publish: Waiting for remaining publishing thread '"
+ currentThread.getName()
+ "' to finish shutdown");
}
try {
currentThread.join(500);
} catch ( final InterruptedException ex ) {
}
}
}
log.logInfo("publish: Shutdown off all remaining publishing thread finished.");
} catch ( final Exception ee ) {
log.logWarning("publish: Unexpected error while trying to shutdown all remaining publishing threads.", e);
log.logWarning(
"publish: Unexpected error while trying to shutdown all remaining publishing threads.",
e);
}
return 0;
@ -546,7 +678,9 @@ public class Network {
}
}
if (className == null) { return null; }
if ( className == null ) {
return null;
}
try {
final Class<?> uploaderClass = Class.forName(className);
final Object uploader = uploaderClass.newInstance();
@ -559,17 +693,30 @@ public class Network {
public static void loadSeedUploadMethods() {
yacySeedUploader uploader;
uploader = new yacySeedUploadFile();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
uploader = new yacySeedUploadFtp();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
uploader = new yacySeedUploadScp();
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
Network.seedUploadMethods.put(uploader
.getClass()
.getSimpleName()
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
}
public static boolean changeSeedUploadMethod(final String method) {
if (method == null || method.length() == 0) { return false; }
if ( method == null || method.length() == 0 ) {
return false;
}
if (method.equalsIgnoreCase("none")) { return true; }
if ( method.equalsIgnoreCase("none") ) {
return true;
}
synchronized ( Network.seedUploadMethods ) {
return Network.seedUploadMethods.containsKey(method);
@ -592,27 +739,30 @@ public class Network {
String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
// for backward compatiblity ....
if (seedUploadMethod.equalsIgnoreCase("Ftp") ||
(seedUploadMethod.equals("") &&
sb.getConfig("seedFTPPassword", "").length() > 0)) {
if ( seedUploadMethod.equalsIgnoreCase("Ftp")
|| (seedUploadMethod.equals("") && sb.getConfig("seedFTPPassword", "").length() > 0) ) {
seedUploadMethod = "Ftp";
sb.setConfig("seedUploadMethod", seedUploadMethod);
} else if (seedUploadMethod.equalsIgnoreCase("File") ||
(seedUploadMethod.equals("") &&
sb.getConfig("seedFilePath", "").length() > 0)) {
} else if ( seedUploadMethod.equalsIgnoreCase("File")
|| (seedUploadMethod.equals("") && sb.getConfig("seedFilePath", "").length() > 0) ) {
seedUploadMethod = "File";
sb.setConfig("seedUploadMethod", seedUploadMethod);
}
// determine the seed uploader that should be used ...
if (seedUploadMethod.equalsIgnoreCase("none")) { return "no uploader specified"; }
if ( seedUploadMethod.equalsIgnoreCase("none") ) {
return "no uploader specified";
}
final yacySeedUploader uploader = getSeedUploader(seedUploadMethod);
if ( uploader == null ) {
final String errorMsg = "Unable to get the proper uploader-class for seed uploading method '" + seedUploadMethod + "'.";
final String errorMsg =
"Unable to get the proper uploader-class for seed uploading method '"
+ seedUploadMethod
+ "'.";
log.logWarning("SaveSeedList: " + errorMsg);
return errorMsg;
}
@ -621,35 +771,51 @@ public class Network {
DigestURI seedURL;
try {
final String seedURLStr = sb.peers.mySeed().get(Seed.SEEDLISTURL, "");
if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); }
if (!(
seedURLStr.toLowerCase().startsWith("http://") ||
seedURLStr.toLowerCase().startsWith("https://")
)) {
if ( seedURLStr.length() == 0 ) {
throw new MalformedURLException("The seed-file url must not be empty.");
}
if ( !(seedURLStr.toLowerCase().startsWith("http://") || seedURLStr.toLowerCase().startsWith(
"https://")) ) {
throw new MalformedURLException("Unsupported protocol.");
}
seedURL = new DigestURI(seedURLStr);
} catch ( final MalformedURLException e ) {
final String errorMsg = "Malformed seed file URL '" + sb.peers.mySeed().get(Seed.SEEDLISTURL, "") + "'. " + e.getMessage();
final String errorMsg =
"Malformed seed file URL '"
+ sb.peers.mySeed().get(Seed.SEEDLISTURL, "")
+ "'. "
+ e.getMessage();
log.logWarning("SaveSeedList: " + errorMsg);
return errorMsg;
}
// upload the seed-list using the configured uploader class
String prevStatus = sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
if (prevStatus.equals(Seed.PEERTYPE_PRINCIPAL)) { prevStatus = Seed.PEERTYPE_SENIOR; }
if ( prevStatus.equals(Seed.PEERTYPE_PRINCIPAL) ) {
prevStatus = Seed.PEERTYPE_SENIOR;
}
try {
sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); // this information shall also be uploaded
if (log.isFine()) log.logFine("SaveSeedList: Using seed uploading method '" + seedUploadMethod + "' for seed-list uploading." +
"\n\tPrevious peerType is '" + sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR) + "'.");
if ( log.isFine() ) {
log.logFine("SaveSeedList: Using seed uploading method '"
+ seedUploadMethod
+ "' for seed-list uploading."
+ "\n\tPrevious peerType is '"
+ sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR)
+ "'.");
}
logt = sb.peers.uploadSeedList(uploader, sb, sb.peers, seedURL);
if ( logt != null ) {
if ( logt.indexOf("Error", 0) >= 0 ) {
sb.peers.mySeed().put(Seed.PEERTYPE, prevStatus);
final String errorMsg = "SaveSeedList: seed upload failed using " + uploader.getClass().getName() + " (error): " + logt.substring(logt.indexOf("Error",0) + 6);
final String errorMsg =
"SaveSeedList: seed upload failed using "
+ uploader.getClass().getName()
+ " (error): "
+ logt.substring(logt.indexOf("Error", 0) + 6);
log.logSevere(errorMsg);
return errorMsg;
}

View File

@ -77,7 +77,8 @@ import net.yacy.search.Switchboard;
import de.anomic.tools.bitfield;
import de.anomic.tools.crypt;
public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
{
public static String ANON_PREFIX = "_anon";
@ -186,7 +187,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
this.hash = theHash;
this.dna = theDna;
final String flags = this.dna.get(Seed.FLAGS);
if ((flags == null) || (flags.length() != 4)) { this.dna.put(Seed.FLAGS, Seed.FLAGSZERO); }
if ( (flags == null) || (flags.length() != 4) ) {
this.dna.put(Seed.FLAGS, Seed.FLAGSZERO);
}
this.dna.put(Seed.NAME, checkPeerName(get(Seed.NAME, "&empty;")));
this.birthdate = -1; // this means 'not yet parsed', parse that later when it is used
}
@ -243,10 +246,12 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* check the peer name: protect against usage as XSS hack
*
* @param id
* @return a checked name without "<" and ">"
*/
private final static Pattern tp = Pattern.compile("<|>");
public static String checkPeerName(String name) {
name = tp.matcher(name).replaceAll("_");
return name;
@ -254,14 +259,21 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* generate a default peer name
*
* @return
*/
private static String defaultPeerName() {
return ANON_PREFIX + OS.infoKey() + "-" + (System.currentTimeMillis() % 77777777L) + "-" + Network.speedKey;
return ANON_PREFIX
+ OS.infoKey()
+ "-"
+ (System.currentTimeMillis() % 77777777L)
+ "-"
+ Network.speedKey;
}
/**
* Checks for the static fragments of a generated default peer name, such as the string 'dpn'
*
* @see #makeDefaultPeerName()
* @param name the peer name to check for default peer name compliance
* @return whether the given peer name may be a default generated peer name
@ -271,92 +283,151 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
}
/**
* used when doing routing within a cluster; this can assign a ip and a port
* that is used instead the address stored in the seed DNA
* used when doing routing within a cluster; this can assign a ip and a port that is used instead the
* address stored in the seed DNA
*/
public void setAlternativeAddress(final String ipport) {
if (ipport == null) return;
if ( ipport == null ) {
return;
}
final int p = ipport.indexOf(':');
if (p < 0) this.alternativeIP = ipport; else this.alternativeIP = ipport.substring(0, p);
if ( p < 0 ) {
this.alternativeIP = ipport;
} else {
this.alternativeIP = ipport.substring(0, p);
}
}
/**
* try to get the IP<br>
*
* @return the IP or null
*/
public final String getIP() {
final String ip = get(Seed.IP, "127.0.0.1");
return (ip == null || ip.length() == 0) ? "127.0.0.1" : ip;
}
/**
* try to get the peertype<br>
*
* @return the peertype or null
*/
public final String getPeerType() { return get(Seed.PEERTYPE, ""); }
public final String getPeerType() {
return get(Seed.PEERTYPE, "");
}
/**
* try to get the peertype<br>
*
* @return the peertype or "virgin"
*/
public final String orVirgin() { return get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN); }
public final String orVirgin() {
return get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN);
}
/**
* try to get the peertype<br>
*
* @return the peertype or "junior"
*/
public final String orJunior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); }
public final String orJunior() {
return get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
}
/**
* try to get the peertype<br>
*
* @return the peertype or "senior"
*/
public final String orSenior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); }
public final String orSenior() {
return get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR);
}
/**
* try to get the peertype<br>
*
* @return the peertype or "principal"
*/
public final String orPrincipal() { return get(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); }
public final String orPrincipal() {
return get(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL);
}
/**
* Get a value from the peer's DNA (its set of peer defining values, e.g. IP, name, version, ...)
*
* @param key the key for the value to fetch
* @param dflt the default value
*/
public final String get(final String key, final String dflt) {
final Object o = this.dna.get(key);
if (o == null) { return dflt; }
if ( o == null ) {
return dflt;
}
return (String) o;
}
public final float getFloat(final String key, final float dflt) {
final Object o = this.dna.get(key);
if (o == null) { return dflt; }
if (o instanceof String) try {
if ( o == null ) {
return dflt;
}
if ( o instanceof String ) {
try {
return Float.parseFloat((String) o);
} catch ( final NumberFormatException e ) {
return dflt;
}
} else if ( o instanceof Float ) {
return ((Float) o).floatValue();
} else return dflt;
} else {
return dflt;
}
}
public final long getLong(final String key, final long dflt) {
final Object o = this.dna.get(key);
if (o == null) { return dflt; }
if (o instanceof String) try {
if ( o == null ) {
return dflt;
}
if ( o instanceof String ) {
try {
return Long.parseLong((String) o);
} catch ( final NumberFormatException e ) {
return dflt;
}
} else if ( o instanceof Long ) {
return ((Long) o).longValue();
} else if ( o instanceof Integer ) {
return ((Integer) o).intValue();
} else return dflt;
} else {
return dflt;
}
}
public final void setIP(final String ip) { this.dna.put(Seed.IP, ip); }
public final void setPort(final String port) { this.dna.put(Seed.PORT, port); }
public final void setType(final String type) { this.dna.put(Seed.PEERTYPE, type); }
public final void setJunior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); }
public final void setSenior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); }
public final void setPrincipal() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); }
public final void setIP(final String ip) {
this.dna.put(Seed.IP, ip);
}
public final void setPort(final String port) {
this.dna.put(Seed.PORT, port);
}
public final void setType(final String type) {
this.dna.put(Seed.PEERTYPE, type);
}
public final void setJunior() {
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
}
public final void setSenior() {
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR);
}
public final void setPrincipal() {
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL);
}
public final void put(final String key, final String value) {
synchronized ( this.dna ) {
@ -385,25 +456,33 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
public final void incSI(final int count) {
String v = this.dna.get(Seed.INDEX_OUT);
if (v == null) { v = Seed.ZERO; }
if ( v == null ) {
v = Seed.ZERO;
}
this.dna.put(Seed.INDEX_OUT, Long.toString(Long.parseLong(v) + count));
}
public final void incRI(final int count) {
String v = this.dna.get(Seed.INDEX_IN);
if (v == null) { v = Seed.ZERO; }
if ( v == null ) {
v = Seed.ZERO;
}
this.dna.put(Seed.INDEX_IN, Long.toString(Long.parseLong(v) + count));
}
public final void incSU(final int count) {
String v = this.dna.get(Seed.URL_OUT);
if (v == null) { v = Seed.ZERO; }
if ( v == null ) {
v = Seed.ZERO;
}
this.dna.put(Seed.URL_OUT, Long.toString(Long.parseLong(v) + count));
}
public final void incRU(final int count) {
String v = this.dna.get(Seed.URL_IN);
if (v == null) { v = Seed.ZERO; }
if ( v == null ) {
v = Seed.ZERO;
}
this.dna.put(Seed.URL_IN, Long.toString(Long.parseLong(v) + count));
}
@ -416,11 +495,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* <code>12 * 6 bit = 72 bit = 24</code> characters octal-hash
* <p>Octal hashes are used for cache-dumps that are DHT-ready</p>
* <p>
* Cause: the natural order of octal hashes are the same as the b64-order of b64Hashes.
* a hexhash cannot be used in such cases, and b64Hashes are not appropriate for file names
* Octal hashes are used for cache-dumps that are DHT-ready
* </p>
* <p>
* Cause: the natural order of octal hashes are the same as the b64-order of b64Hashes. a hexhash cannot
* be used in such cases, and b64Hashes are not appropriate for file names
* </p>
*
* @param b64Hash a base64 hash
* @return the octal representation of the given base64 hash
*/
@ -430,11 +512,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* <code>12 * 6 bit = 72 bit = 18</code> characters hex-hash
*
* @param b64Hash a base64 hash
* @return the hexadecimal representation of the given base64 hash
*/
public static String b64Hash2hexHash(final String b64Hash) {
if (b64Hash.length() > 12) return "";
if ( b64Hash.length() > 12 ) {
return "";
}
// the hash string represents 12 * 6 bit = 72 bits. This is too much for a long integer.
return Digest.encodeHex(Base64Order.enhancedCoder.decode(b64Hash));
}
@ -449,6 +534,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* The returned version follows this pattern: <code>MAJORVERSION . MINORVERSION 0 SVN REVISION</code>
*
* @return the YaCy version of this peer as a float or <code>0</code> if no valid value could be retrieved
* from this yacySeed object
*/
@ -462,6 +548,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* get the SVN version of the peer
*
* @return
*/
public final int getRevision() {
@ -474,10 +561,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
*/
public final String getPublicAddress() {
String ip = getIP();
if (ip == null || ip.length() < 8 || ip.length() > 60) ip = "127.0.0.1";
if ( ip == null || ip.length() < 8 || ip.length() > 60 ) {
ip = "127.0.0.1";
}
final String port = this.dna.get(Seed.PORT);
if (port == null || port.length() < 2 || port.length() > 5) return null;
if ( port == null || port.length() < 2 || port.length() > 5 ) {
return null;
}
final StringBuilder sb = new StringBuilder(ip.length() + port.length() + 1);
sb.append(ip);
@ -488,16 +579,21 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* If this seed is part of a cluster, the peer has probably the {@linkplain #alternativeIP} object set to
* a local IP. If this is present and the public IP of this peer is identical to the public IP of the own seed,
* construct an address using this IP; otherwise return the public address
* a local IP. If this is present and the public IP of this peer is identical to the public IP of the own
* seed, construct an address using this IP; otherwise return the public address
*
* @see #getPublicAddress()
* @return the alternative IP:port if present, else the public address
*/
public final String getClusterAddress() {
if (this.alternativeIP == null) return getPublicAddress();
if ( this.alternativeIP == null ) {
return getPublicAddress();
}
final String port = this.dna.get(Seed.PORT);
if ((port == null) || (port.length() < 2)) return null;
if ( (port == null) || (port.length() < 2) ) {
return null;
}
return this.alternativeIP + ":" + port;
}
@ -512,7 +608,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/** @return the portnumber of this seed or <code>-1</code> if not present */
public final int getPort() {
final String port = this.dna.get(Seed.PORT);
if (port == null) return -1;
if ( port == null ) {
return -1;
}
/*if (port.length() < 2) return -1; It is possible to use port 0-9*/
return Integer.parseInt(port);
}
@ -522,8 +620,11 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
// because java thinks it must apply the UTC offset to the current time,
// to create a string that looks like our current time, it adds the local UTC offset to the
// time. To create a corrected UTC Date string, we first subtract the local UTC offset.
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
final String ls = my_SHORT_SECOND_FORMATTER.format(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/));
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
final String ls =
my_SHORT_SECOND_FORMATTER
.format(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/));
//System.out.println("SETTING LAST-SEEN of " + this.getName() + " to " + ls);
this.dna.put(Seed.LASTSEEN, ls);
}
@ -533,7 +634,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
*/
public final long getLastSeenUTC() {
try {
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
final long t = my_SHORT_SECOND_FORMATTER.parse(get(Seed.LASTSEEN, "20040101000000")).getTime();
// getTime creates a UTC time number. But in this case java thinks, that the given
// time string is a local time, which has a local UTC offset applied.
@ -551,6 +653,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
/**
* test if the lastSeen time of the seed has a time-out
*
* @param milliseconds the maximum age of the last-seen value
* @return true, if the time between the last-seen time and now is greater then the given time-out
*/
@ -560,10 +663,13 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
}
public final long getBirthdate() {
if (this.birthdate > 0) return this.birthdate;
if ( this.birthdate > 0 ) {
return this.birthdate;
}
long b;
try {
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
final GenericFormatter my_SHORT_SECOND_FORMATTER =
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
b = my_SHORT_SECOND_FORMATTER.parse(get(Seed.BDATE, "20040101000000")).getTime();
} catch ( final ParseException e ) {
b = System.currentTimeMillis();
@ -587,11 +693,15 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
public boolean matchPeerTags(final HandleSet searchHashes) {
final String peertags = get(PEERTAGS, "");
if (peertags.equals("*")) return true;
if ( peertags.equals("*") ) {
return true;
}
final Set<String> tags = MapTools.string2set(peertags, "|");
final Iterator<String> i = tags.iterator();
while ( i.hasNext() ) {
if (searchHashes.has(Word.word2hash(i.next()))) return true;
if ( searchHashes.has(Word.word2hash(i.next())) ) {
return true;
}
}
return false;
}
@ -635,52 +745,79 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
private void setFlag(final int flag, final boolean value) {
String flags = get(Seed.FLAGS, Seed.FLAGSZERO);
if (flags.length() != 4) { flags = Seed.FLAGSZERO; }
if ( flags.length() != 4 ) {
flags = Seed.FLAGSZERO;
}
final bitfield f = new bitfield(UTF8.getBytes(flags));
f.set(flag, value);
this.dna.put(Seed.FLAGS, UTF8.String(f.getBytes()));
}
public final void setFlagDirectConnect(final boolean value) { setFlag(FLAG_DIRECT_CONNECT, value); }
public final void setFlagAcceptRemoteCrawl(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_CRAWL, value); }
public final void setFlagAcceptRemoteIndex(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_INDEX, value); }
public final boolean getFlagDirectConnect() { return getFlag(0); }
public final void setFlagDirectConnect(final boolean value) {
setFlag(FLAG_DIRECT_CONNECT, value);
}
public final void setFlagAcceptRemoteCrawl(final boolean value) {
setFlag(FLAG_ACCEPT_REMOTE_CRAWL, value);
}
public final void setFlagAcceptRemoteIndex(final boolean value) {
setFlag(FLAG_ACCEPT_REMOTE_INDEX, value);
}
public final boolean getFlagDirectConnect() {
return getFlag(0);
}
public final boolean getFlagAcceptRemoteCrawl() {
//if (getVersion() < 0.300) return false;
//if (getVersion() < 0.334) return true;
return getFlag(1);
}
public final boolean getFlagAcceptRemoteIndex() {
//if (getVersion() < 0.335) return false;
return getFlag(2);
}
public final void setUnusedFlags() {
for (int i = 4; i < 24; i++) { setFlag(i, true); }
for ( int i = 4; i < 24; i++ ) {
setFlag(i, true);
}
}
public final boolean isType(final String type) {
return get(Seed.PEERTYPE, "").equals(type);
}
public final boolean isVirgin() {
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_VIRGIN);
}
public final boolean isJunior() {
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_JUNIOR);
}
public final boolean isSenior() {
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_SENIOR);
}
public final boolean isPrincipal() {
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_PRINCIPAL);
}
public final boolean isPotential() {
return isVirgin() || isJunior();
}
public final boolean isActive() {
return isSenior() || isPrincipal();
}
public final boolean isOnline() {
return isSenior() || isPrincipal();
}
public final boolean isOnline(final String type) {
return type.equals(Seed.PEERTYPE_SENIOR) || type.equals(Seed.PEERTYPE_PRINCIPAL);
}
@ -702,13 +839,19 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
String interval = null;
while ( !gaps.isEmpty() ) {
interval = gaps.remove(gaps.lastKey());
if (random.nextBoolean()) break;
if ( random.nextBoolean() ) {
break;
}
}
if ( interval == null ) {
return randomHash();
}
if (interval == null) return randomHash();
// find dht position and size of gap
final long left = FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(0, 12)), null);
final long right = FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(12)), null);
final long left =
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(0, 12)), null);
final long right =
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(12)), null);
final long gap8 = FlatWordPartitionScheme.dhtDistance(left, right) >> 3; // 1/8 of a gap
final long gapx = gap8 + (Math.abs(random.nextLong()) % (6 * gap8));
final long gappos = (Long.MAX_VALUE - left >= gapx) ? left + gapx : (left - Long.MAX_VALUE) + gapx;
@ -725,7 +868,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
combined[1] = randomHash[1];
}
// finally check if the hash is already known
while (seedDB.hasConnected(combined) || seedDB.hasDisconnected(combined) || seedDB.hasPotential(combined)) {
while ( seedDB.hasConnected(combined)
|| seedDB.hasDisconnected(combined)
|| seedDB.hasPotential(combined) ) {
// if we are lucky then this loop will never run
combined = randomHash();
}
@ -734,7 +879,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
private static TreeMap<Long, String> hashGaps(final SeedDB seedDB) {
final TreeMap<Long, String> gaps = new TreeMap<Long, String>();
if (seedDB == null) return gaps;
if ( seedDB == null ) {
return gaps;
}
final Iterator<Seed> i = seedDB.seedsConnected(true, false, null, (float) 0.0);
long l;
@ -746,7 +893,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
first = s0;
continue;
}
l = FlatWordPartitionScheme.dhtDistance(
l =
FlatWordPartitionScheme.dhtDistance(
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s0.hash), null),
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s1.hash), null));
gaps.put(l, s0.hash + s1.hash);
@ -754,7 +902,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
}
// compute also the last gap
if ( (first != null) && (s0 != null) ) {
l = FlatWordPartitionScheme.dhtDistance(
l =
FlatWordPartitionScheme.dhtDistance(
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s0.hash), null),
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(first.hash), null));
gaps.put(l, s0.hash + first.hash);
@ -785,26 +934,44 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
public static byte[] randomHash() {
final String hash =
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6) +
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6);
Base64Order.enhancedCoder
.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong())))
.substring(0, 6)
+ Base64Order.enhancedCoder
.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong())))
.substring(0, 6);
return ASCII.getBytes(hash);
}
public static Seed genRemoteSeed(final String seedStr, final String key, final boolean ownSeed, final String patchIP) throws IOException {
public static Seed genRemoteSeed(
final String seedStr,
final String key,
final boolean ownSeed,
final String patchIP) throws IOException {
// this method is used to convert the external representation of a seed into a seed object
// yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key);
// check protocol and syntax of seed
if (seedStr == null) throw new IOException("seedStr == null");
if (seedStr.length() == 0) throw new IOException("seedStr.length() == 0");
if ( seedStr == null ) {
throw new IOException("seedStr == null");
}
if ( seedStr.length() == 0 ) {
throw new IOException("seedStr.length() == 0");
}
final String seed = crypt.simpleDecode(seedStr, key);
if (seed == null) throw new IOException("seed == null");
if (seed.length() == 0) throw new IOException("seed.length() == 0");
if ( seed == null ) {
throw new IOException("seed == null");
}
if ( seed.length() == 0 ) {
throw new IOException("seed.length() == 0");
}
// extract hash
final ConcurrentHashMap<String, String> dna = MapTools.string2map(seed, ",");
final String hash = dna.remove(Seed.HASH);
if (hash == null) throw new IOException("hash == null");
if ( hash == null ) {
throw new IOException("hash == null");
}
final Seed resultSeed = new Seed(hash, dna);
// check semantics of content
@ -816,7 +983,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
resultSeed.setIP(patchIP);
testResult = resultSeed.isProper(ownSeed);
}
if (testResult != null) throw new IOException("seed is not proper (" + testResult + "): " + resultSeed);
if ( testResult != null ) {
throw new IOException("seed is not proper (" + testResult + "): " + resultSeed);
}
// seed ok
return resultSeed;
@ -827,36 +996,52 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
// checks if everything is ok with that seed
// check hash
if (this.hash == null) return "hash is null";
if (this.hash.length() != Word.commonHashLength) return "wrong hash length (" + this.hash.length() + ")";
if ( this.hash == null ) {
return "hash is null";
}
if ( this.hash.length() != Word.commonHashLength ) {
return "wrong hash length (" + this.hash.length() + ")";
}
// name
final String peerName = this.dna.get(Seed.NAME);
if (peerName == null) return "no peer name given";
if ( peerName == null ) {
return "no peer name given";
}
this.dna.put(Seed.NAME, checkPeerName(peerName));
// type
final String peerType = getPeerType();
if ((peerType == null) ||
!(peerType.equals(Seed.PEERTYPE_VIRGIN) || peerType.equals(Seed.PEERTYPE_JUNIOR)
|| peerType.equals(Seed.PEERTYPE_SENIOR) || peerType.equals(Seed.PEERTYPE_PRINCIPAL)))
if ( (peerType == null)
|| !(peerType.equals(Seed.PEERTYPE_VIRGIN)
|| peerType.equals(Seed.PEERTYPE_JUNIOR)
|| peerType.equals(Seed.PEERTYPE_SENIOR) || peerType.equals(Seed.PEERTYPE_PRINCIPAL)) ) {
return "invalid peerType '" + peerType + "'";
}
// check IP
if ( !checkOwnIP ) {
// checking of IP is omitted if we read the own seed file
final String ipCheck = isProperIP(getIP());
if (ipCheck != null) return ipCheck;
if ( ipCheck != null ) {
return ipCheck;
}
}
// seedURL
final String seedURL = this.dna.get(SEEDLISTURL);
if ( seedURL != null && seedURL.length() > 0 ) {
if (!seedURL.startsWith("http://") && !seedURL.startsWith("https://")) return "wrong protocol for seedURL";
if ( !seedURL.startsWith("http://") && !seedURL.startsWith("https://") ) {
return "wrong protocol for seedURL";
}
try {
final URL url = new URL(seedURL);
final String host = url.getHost();
if (host.equals("localhost") || host.startsWith("127.") || (host.startsWith("0:0:0:0:0:0:0:1"))) return "seedURL in localhost rejected";
if ( host.equals("localhost")
|| host.startsWith("127.")
|| (host.startsWith("0:0:0:0:0:0:0:1")) ) {
return "seedURL in localhost rejected";
}
} catch ( final MalformedURLException e ) {
return "seedURL malformed";
}
@ -866,12 +1051,20 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
public static final String isProperIP(final String ipString) {
// returns null if ipString is proper, a string with the cause otherwise
if (ipString == null) return ipString + " -> IP is null";
if (ipString.length() > 0 && ipString.length() < 8) return ipString + " -> IP is too short: ";
if (Switchboard.getSwitchboard().isAllIPMode()) return null;
if ( ipString == null ) {
return ipString + " -> IP is null";
}
if ( ipString.length() > 0 && ipString.length() < 8 ) {
return ipString + " -> IP is too short: ";
}
if ( Switchboard.getSwitchboard().isAllIPMode() ) {
return null;
}
final boolean islocal = Domains.isLocal(ipString, null);
//if (islocal && Switchboard.getSwitchboard().isGlobalMode()) return ipString + " - local IP for global mode rejected";
if (!islocal && Switchboard.getSwitchboard().isIntranetMode()) return ipString + " - global IP for intranet mode rejected";
if ( !islocal && Switchboard.getSwitchboard().isIntranetMode() ) {
return ipString + " - global IP for intranet mode rejected";
}
return null;
}
@ -890,7 +1083,11 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
final String b = crypt.simpleEncode(r, key, 'b');
// the compressed string may be longer that the uncompressed if there is too much overhead for compression meta-info
// take simply that string that is shorter
if (b.length() < z.length()) return b; else return z;
if ( b.length() < z.length() ) {
return b;
} else {
return z;
}
}
public final void save(final File f) throws IOException {
@ -923,8 +1120,12 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
// TODO Auto-generated method stub
final int o1 = hashCode();
final int o2 = arg0.hashCode();
if (o1 > o2) return 1;
if (o2 > o1) return -1;
if ( o1 > o2 ) {
return 1;
}
if ( o2 > o1 ) {
return -1;
}
return 0;
}

0
source/net/yacy/peers/dht/Dispatcher.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/FlatWordPartitionScheme.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/PartitionScheme.java Executable file → Normal file
View File

0
source/net/yacy/peers/dht/PeerSelection.java Executable file → Normal file
View File

View File

View File

@ -29,13 +29,14 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@ -91,22 +92,22 @@ public class Blacklist {
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null;
private final Map<String, HandleSet> cachedUrlHashs;
private final Map<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final Map<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
setRootPath(rootPath);
// prepare the data structure
this.hostpaths_matchable = new HashMap<String, Map<String, List<String>>>();
this.hostpaths_notmatchable = new HashMap<String, Map<String, List<String>>>();
this.cachedUrlHashs = new HashMap<String, HandleSet>();
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) {
this.hostpaths_matchable.put(blacklistType, new HashMap<String, List<String>>());
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, List<String>>());
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}

View File

@ -64,6 +64,7 @@ import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.retrieval.Response;
import de.anomic.crawler.retrieval.SMBLoader;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.http.client.Cache;
public final class LoaderDispatcher {
@ -137,7 +138,7 @@ public final class LoaderDispatcher {
public void load(final DigestURI url, final CacheStrategy cacheStratgy, final int maxFileSize, final File targetFile) throws IOException {
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, false).getContent();
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, true).getContent();
if (b == null) throw new IOException("load == null");
final File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
@ -190,6 +191,12 @@ public final class LoaderDispatcher {
final String protocol = url.getProtocol();
final String host = url.getHost();
// check if url is in blacklist
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}
// check if we have the page in the cache
final CrawlProfile crawlProfile = this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
if (crawlProfile != null && cacheStrategy != CacheStrategy.NOCACHE) {
@ -324,7 +331,7 @@ public final class LoaderDispatcher {
*/
public byte[] loadContent(final Request request, final CacheStrategy cacheStrategy) throws IOException {
// try to download the resource using the loader
final Response entry = load(request, cacheStrategy, false);
final Response entry = load(request, cacheStrategy, true);
if (entry == null) return null; // not found in web
// read resource body (if it is there)
@ -334,7 +341,7 @@ public final class LoaderDispatcher {
public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int timeout, final int maxFileSize) throws IOException, Parser.Failure {
// load resource
final Response response = load(request, cacheStrategy, maxFileSize, false);
final Response response = load(request, cacheStrategy, maxFileSize, true);
final DigestURI url = request.url();
if (response == null) throw new IOException("no Response for url " + url);
@ -347,7 +354,7 @@ public final class LoaderDispatcher {
public ContentScraper parseResource(final DigestURI location, final CacheStrategy cachePolicy) throws IOException {
// load page
final Response r = this.load(request(location, true, false), cachePolicy, false);
final Response r = this.load(request(location, true, false), cachePolicy, true);
final byte[] page = (r == null) ? null : r.getContent();
if (page == null) throw new IOException("no response from url " + location.toString());
@ -366,7 +373,7 @@ public final class LoaderDispatcher {
* @throws IOException
*/
public final Map<MultiProtocolURI, String> loadLinks(final DigestURI url, final CacheStrategy cacheStrategy) throws IOException {
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, false);
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, true);
if (response == null) throw new IOException("response == null");
final ResponseHeader responseHeader = response.getResponseHeader();
if (response.getContent() == null) throw new IOException("resource == null");

File diff suppressed because it is too large Load Diff

View File

@ -278,8 +278,8 @@ public final class SwitchboardConstants {
public static final String CLUSTER_MODE = "cluster.mode";
public static final String CLUSTER_MODE_PUBLIC_CLUSTER = "publiccluster";
public static final String CLUSTER_MODE_PRIVATE_CLUSTER = "privatecluster";
public static final String CLUSTER_MODE_PUBLIC_PEER = "publicpeer";
public static final String CLUSTER_MODE_PRIVATE_PEER = "privatepeer";
public static final String CLUSTER_PEERS_IPPORT = "cluster.peers.ipport";
public static final String DHT_BURST_ROBINSON = "network.unit.dht.burst.robinson";

View File

@ -93,6 +93,7 @@ public final class RWIProcess extends Thread
private final ReferenceOrder order;
private final long startTime;
private boolean addRunning;
private boolean fresh;
// navigation scores
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
@ -136,6 +137,7 @@ public final class RWIProcess extends Thread
this.maxExpectedRemoteReferences = new AtomicInteger(0);
this.expectedRemoteReferences = new AtomicInteger(0);
this.receivedRemoteReferences = new AtomicInteger(0);
this.fresh = true;
}
public void addExpectedRemoteReferences(int x) {
@ -388,10 +390,11 @@ public final class RWIProcess extends Thread
public void oneFeederStarted() {
this.feeders.addAndGet(1);
this.fresh = false;
}
public boolean feedingIsFinished() {
return this.feeders.get() <= 0;
return !this.fresh && this.feeders.get() <= 0;
}
private boolean testFlags(final WordReference ientry) {

View File

@ -27,6 +27,7 @@ package net.yacy.search.snippet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -48,7 +49,10 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.ByteArray;
import net.yacy.repository.Blacklist;
import net.yacy.search.Switchboard;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.ZURL.FailCategory;
public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
@ -165,6 +169,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
entry = i.next();
url = new DigestURI(entry.getKey());
desc = entry.getValue();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
@ -189,6 +194,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
ientry = i.next();
url = new DigestURI(ientry.url());
final String u = url.toString();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -230,4 +236,27 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
return remaininghashes;
}
/**
* Checks wether given URL is in blacklist for given blacklist type
*
* @param url The URL to check
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Wether the given URL is blacklisted
*/
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) {
// Default is not blacklisted
boolean isBlacklisted = false;
// check if url is in blacklist
if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), Switchboard.getSwitchboard().peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
Log.logFine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
isBlacklisted = true;
}
// Return result
return isBlacklisted;
}
}