mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git
Conflicts: source/net/yacy/search/Switchboard.java
This commit is contained in:
commit
2ee8cbeb2c
23
bin/checkalive.sh
Executable file
23
bin/checkalive.sh
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
# add in /etc/crontab
|
||||
# 0 * * * * yacy cd /home/yacy/production/bin && ./checkalive.sh
|
||||
|
||||
RESULT=`wget --spider http://localhost:8090/Status.html 2>&1`
|
||||
FLAG=0
|
||||
|
||||
for x in $RESULT; do
|
||||
if [ "$x" = '200' ]; then
|
||||
FLAG=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $FLAG -eq '0' ]; then
|
||||
cd ..
|
||||
timeout 30s ./stopYACY.sh
|
||||
./killYACY.sh
|
||||
rm DATA/yacy.running
|
||||
./startYACY.sh
|
||||
fi
|
||||
|
||||
exit
|
|
@ -41,9 +41,13 @@ import de.anomic.server.serverObjects;
|
|||
import de.anomic.server.serverSwitch;
|
||||
|
||||
/** draw a banner with information about the peer */
|
||||
public class Banner {
|
||||
public class Banner
|
||||
{
|
||||
|
||||
public static RasterPlotter respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws IOException {
|
||||
public static RasterPlotter respond(
|
||||
final RequestHeader header,
|
||||
final serverObjects post,
|
||||
final serverSwitch env) throws IOException {
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final String IMAGE = "htroot/env/grafics/yacy.png";
|
||||
int width = 468;
|
||||
|
@ -106,10 +110,45 @@ public class Banner {
|
|||
if ( !NetworkGraph.logoIsLoaded() ) {
|
||||
ImageIO.setUseCache(false); // do not write a cache to disc; keep in RAM
|
||||
final BufferedImage logo = ImageIO.read(new File(IMAGE));
|
||||
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm, logo);
|
||||
return NetworkGraph.getBannerPicture(
|
||||
1000,
|
||||
width,
|
||||
height,
|
||||
bgcolor,
|
||||
textcolor,
|
||||
bordercolor,
|
||||
name,
|
||||
links,
|
||||
words,
|
||||
type,
|
||||
myppm,
|
||||
network,
|
||||
peers,
|
||||
nlinks,
|
||||
nwords,
|
||||
nqph,
|
||||
nppm,
|
||||
logo);
|
||||
}
|
||||
|
||||
return NetworkGraph.getBannerPicture(1000, width, height, bgcolor, textcolor, bordercolor, name, links, words, type, myppm, network, peers, nlinks, nwords, nqph, nppm);
|
||||
return NetworkGraph.getBannerPicture(
|
||||
1000,
|
||||
width,
|
||||
height,
|
||||
bgcolor,
|
||||
textcolor,
|
||||
bordercolor,
|
||||
name,
|
||||
links,
|
||||
words,
|
||||
type,
|
||||
myppm,
|
||||
network,
|
||||
peers,
|
||||
nlinks,
|
||||
nwords,
|
||||
nqph,
|
||||
nppm);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
0
htroot/Collage.html
Executable file → Normal file
0
htroot/Collage.html
Executable file → Normal file
0
htroot/Collage.java
Executable file → Normal file
0
htroot/Collage.java
Executable file → Normal file
|
@ -163,23 +163,9 @@
|
|||
</dt>
|
||||
<dd>Your search engine will not contact any other peer, and will reject every request.
|
||||
</dd>
|
||||
<!-- not yet implemented
|
||||
<dt>
|
||||
<label for="cluster.modePrivatecluster">Private Cluster</label>
|
||||
<input type="radio" value="privatecluster" id="cluster.modePrivatecluster" name="cluster.mode"
|
||||
#(privateclusterChecked)#::checked="checked" #(/privateclusterChecked)#/>
|
||||
</dt>
|
||||
<dd>
|
||||
Your peer is part of a private cluster without public visibility.<br />
|
||||
Index data is not distributed, but remote crawl requests are distributed and accepted from your cluster.<br />
|
||||
Search requests are spread over all peers of the cluster, and answered from all peers of the cluster.<br />
|
||||
List of ip:port - addresses of the cluster: (comma-separated)<br />
|
||||
<input type="text" name="cluster.peers.ipport" value="#[cluster.peers.ipport]#" size="80" maxlength="800" />
|
||||
</dd>
|
||||
-->
|
||||
<dt>
|
||||
<label for="cluster.modePublicpeer">Public Peer</label>
|
||||
<input type="radio" value="publicpeer" id="cluster.modePublicpeer" name="cluster.mode"
|
||||
<label for="publicpeer">Public Peer</label>
|
||||
<input type="radio" value="publicpeer" id="publicpeer" name="cluster.mode"
|
||||
#(publicpeerChecked)#::checked="checked" #(/publicpeerChecked)#/>
|
||||
</dt>
|
||||
<dd>
|
||||
|
@ -187,8 +173,8 @@
|
|||
Your peer does not accept any outside index data, but responds on all remote search requests.
|
||||
</dd>
|
||||
<dt>
|
||||
<label for="cluster.modePubliccluster">Public Cluster</label>
|
||||
<input type="radio" value="publiccluster" id="cluster.modePubliccluster" name="cluster.mode"
|
||||
<label for="publiccluster">Public Cluster</label>
|
||||
<input type="radio" value="publiccluster" id="publiccluster" name="cluster.mode"
|
||||
#(publicclusterChecked)#::checked="checked" #(/publicclusterChecked)#/>
|
||||
</dt>
|
||||
<dd>
|
||||
|
|
|
@ -40,25 +40,35 @@ import de.anomic.data.WorkTables;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class ConfigNetwork_p {
|
||||
public class ConfigNetwork_p
|
||||
{
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) throws FileNotFoundException, IOException {
|
||||
public static serverObjects respond(
|
||||
final RequestHeader header,
|
||||
final serverObjects post,
|
||||
final serverSwitch env) throws FileNotFoundException, IOException {
|
||||
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
final serverObjects prop = new serverObjects();
|
||||
int commit = 0;
|
||||
|
||||
// load all options for network definitions
|
||||
final File networkBootstrapLocationsFile = new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
|
||||
final File networkBootstrapLocationsFile =
|
||||
new File(new File(sb.getAppPath(), "defaults"), "yacy.networks");
|
||||
final Set<String> networkBootstrapLocations = FileUtils.loadList(networkBootstrapLocationsFile);
|
||||
|
||||
if ( post != null ) {
|
||||
|
||||
// store this call as api call
|
||||
sb.tables.recordAPICall(post, "ConfigNetwork_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "network settings");
|
||||
sb.tables.recordAPICall(
|
||||
post,
|
||||
"ConfigNetwork_p.html",
|
||||
WorkTables.TABLE_API_TYPE_CONFIGURATION,
|
||||
"network settings");
|
||||
|
||||
if ( post.containsKey("changeNetwork") ) {
|
||||
final String networkDefinition = post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
|
||||
final String networkDefinition =
|
||||
post.get("networkDefinition", "defaults/yacy.network.freeworld.unit");
|
||||
if ( networkDefinition.equals(sb.getConfig("network.unit.definition", "")) ) {
|
||||
// no change
|
||||
commit = 3;
|
||||
|
@ -74,6 +84,12 @@ public class ConfigNetwork_p {
|
|||
// DHT control
|
||||
boolean indexDistribute = "on".equals(post.get("indexDistribute", ""));
|
||||
boolean indexReceive = "on".equals(post.get("indexReceive", ""));
|
||||
if ( !indexReceive ) {
|
||||
// remove heuristics
|
||||
sb.setConfig("heuristic.site", false);
|
||||
sb.setConfig("heuristic.scroogle", false);
|
||||
sb.setConfig("heuristic.blekko", false);
|
||||
}
|
||||
final boolean robinsonmode = "robinson".equals(post.get("network", ""));
|
||||
if ( robinsonmode ) {
|
||||
indexDistribute = false;
|
||||
|
@ -127,12 +143,17 @@ public class ConfigNetwork_p {
|
|||
}
|
||||
|
||||
if ( post.containsKey("peertags") ) {
|
||||
sb.peers.mySeed().setPeerTags(MapTools.string2set(normalizedList(post.get("peertags")), ","));
|
||||
sb.peers.mySeed().setPeerTags(
|
||||
MapTools.string2set(normalizedList(post.get("peertags")), ","));
|
||||
}
|
||||
|
||||
sb.setConfig("cluster.mode", post.get("cluster.mode", "publicpeer"));
|
||||
sb.setConfig("cluster.mode", post.get(
|
||||
SwitchboardConstants.CLUSTER_MODE,
|
||||
SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER));
|
||||
sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", "")));
|
||||
sb.setConfig("cluster.peers.yacydomain", checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
|
||||
sb.setConfig(
|
||||
"cluster.peers.yacydomain",
|
||||
checkYaCyDomainList(post.get("cluster.peers.yacydomain", "")));
|
||||
|
||||
// update the cluster hash set
|
||||
sb.clusterhashes = sb.peers.clusterHashes(sb.getConfig("cluster.peers.yacydomain", ""));
|
||||
|
@ -144,20 +165,34 @@ public class ConfigNetwork_p {
|
|||
|
||||
// write remote crawl request settings
|
||||
prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? "1" : "0");
|
||||
final long RTCbusySleep = Math.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
|
||||
final long RTCbusySleep =
|
||||
Math
|
||||
.max(1, env.getConfigInt(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, 100));
|
||||
final int RTCppm = (int) (60000L / RTCbusySleep);
|
||||
prop.put("acceptCrawlLimit", RTCppm);
|
||||
|
||||
final boolean indexDistribute = sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW, true);
|
||||
final boolean indexReceive = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
|
||||
prop.put("indexDistributeChecked", (indexDistribute) ? "1" : "0");
|
||||
prop.put("indexDistributeWhileCrawling.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
|
||||
prop.put("indexDistributeWhileCrawling.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
|
||||
prop.put("indexDistributeWhileIndexing.on", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
|
||||
prop.put("indexDistributeWhileIndexing.off", (sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
|
||||
prop.put(
|
||||
"indexDistributeWhileCrawling.on",
|
||||
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "1" : "0");
|
||||
prop.put(
|
||||
"indexDistributeWhileCrawling.off",
|
||||
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_CRAWLING, true)) ? "0" : "1");
|
||||
prop.put(
|
||||
"indexDistributeWhileIndexing.on",
|
||||
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "1" : "0");
|
||||
prop.put(
|
||||
"indexDistributeWhileIndexing.off",
|
||||
(sb.getConfigBool(SwitchboardConstants.INDEX_DIST_ALLOW_WHILE_INDEXING, true)) ? "0" : "1");
|
||||
prop.put("indexReceiveChecked", (indexReceive) ? "1" : "0");
|
||||
prop.put("indexReceiveBlockBlacklistChecked.on", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
|
||||
prop.put("indexReceiveBlockBlacklistChecked.off", (sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
|
||||
prop.put(
|
||||
"indexReceiveBlockBlacklistChecked.on",
|
||||
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "1" : "0");
|
||||
prop.put(
|
||||
"indexReceiveBlockBlacklistChecked.off",
|
||||
(sb.getConfigBool("indexReceiveBlockBlacklist", true)) ? "0" : "1");
|
||||
prop.putHTML("peertags", MapTools.set2string(sb.peers.mySeed().getPeerTags(), ",", false));
|
||||
|
||||
// set seed information directly
|
||||
|
@ -180,10 +215,15 @@ public class ConfigNetwork_p {
|
|||
prop.put("cluster.peers.yacydomain.hashes", hashes.toString());
|
||||
|
||||
// set p2p mode flags
|
||||
prop.put("privatepeerChecked", ("privatepeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
|
||||
prop.put("privateclusterChecked", ("privatecluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
|
||||
prop.put("publicclusterChecked", ("publiccluster".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
|
||||
prop.put("publicpeerChecked", ("publicpeer".equals(sb.getConfig("cluster.mode", ""))) ? "1" : "0");
|
||||
prop.put(
|
||||
"privatepeerChecked",
|
||||
(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
|
||||
prop.put(
|
||||
"publicclusterChecked",
|
||||
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
|
||||
prop.put(
|
||||
"publicpeerChecked",
|
||||
(SwitchboardConstants.CLUSTER_MODE_PUBLIC_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) ? "1" : "0");
|
||||
|
||||
// set network configuration
|
||||
prop.putHTML("network.unit.definition", sb.getConfig("network.unit.definition", ""));
|
||||
|
@ -218,8 +258,10 @@ public class ConfigNetwork_p {
|
|||
final String[] array = normalizedList(input).split(",");
|
||||
final StringBuilder output = new StringBuilder();
|
||||
for ( final String element : array ) {
|
||||
if ((element.endsWith(".yacyh")) || (element.endsWith(".yacy")) ||
|
||||
(element.indexOf(".yacyh=",0) > 0) || (element.indexOf(".yacy=",0) > 0)) {
|
||||
if ( (element.endsWith(".yacyh"))
|
||||
|| (element.endsWith(".yacy"))
|
||||
|| (element.indexOf(".yacyh=", 0) > 0)
|
||||
|| (element.indexOf(".yacy=", 0) > 0) ) {
|
||||
output.append(",").append(element);
|
||||
}
|
||||
}
|
||||
|
|
0
htroot/IndexCleaner_p.html
Executable file → Normal file
0
htroot/IndexCleaner_p.html
Executable file → Normal file
0
htroot/IndexCleaner_p.java
Executable file → Normal file
0
htroot/IndexCleaner_p.java
Executable file → Normal file
|
@ -70,6 +70,12 @@
|
|||
</dd>
|
||||
#(/urgentStatusVirgin)#
|
||||
|
||||
#(hintStatusPrivate)#::
|
||||
<dt class="hintIcon"><img src="env/grafics/bad.png" width="32" height="32" alt="idea"/></dt>
|
||||
<dd class="hint">Your network configuration is in private mode. Your peer seed will not be published.
|
||||
</dd>
|
||||
#(/hintStatusPrivate)#
|
||||
|
||||
<!-- warnings -->
|
||||
|
||||
#(warningGoOnline)#::
|
||||
|
|
|
@ -46,25 +46,31 @@ import de.anomic.server.serverCore;
|
|||
import de.anomic.server.serverObjects;
|
||||
import de.anomic.server.serverSwitch;
|
||||
|
||||
public class Status {
|
||||
public class Status
|
||||
{
|
||||
|
||||
private static final String SEEDSERVER = "seedServer";
|
||||
private static final String PEERSTATUS = "peerStatus";
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
public static serverObjects respond(
|
||||
final RequestHeader header,
|
||||
final serverObjects post,
|
||||
final serverSwitch env) {
|
||||
// return variable that accumulates replacements
|
||||
final serverObjects prop = new serverObjects();
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
|
||||
// check if the basic configuration was accessed before and forward
|
||||
prop.put("forwardToConfigBasic", 0);
|
||||
if ((post == null || !post.containsKey("noforward")) &&
|
||||
sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html",0) < 0 &&
|
||||
Seed.isDefaultPeerName(sb.peers.mySeed().getName())) {
|
||||
if ( (post == null || !post.containsKey("noforward"))
|
||||
&& sb.getConfig("server.servlets.submitted", "").indexOf("ConfigBasic.html", 0) < 0
|
||||
&& Seed.isDefaultPeerName(sb.peers.mySeed().getName()) ) {
|
||||
// forward to ConfigBasic
|
||||
prop.put("forwardToConfigBasic", 1);
|
||||
}
|
||||
if (post != null) post.remove("noforward");
|
||||
if ( post != null ) {
|
||||
post.remove("noforward");
|
||||
}
|
||||
|
||||
if ( post != null && post.size() > 0 ) {
|
||||
if ( sb.adminAuthenticated(header) < 2 ) {
|
||||
|
@ -123,7 +129,8 @@ public class Status {
|
|||
}
|
||||
|
||||
// password protection
|
||||
if ((sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0) && (!sb.getConfigBool("adminAccountForLocalhost", false))) {
|
||||
if ( (sb.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").length() == 0)
|
||||
&& (!sb.getConfigBool("adminAccountForLocalhost", false)) ) {
|
||||
prop.put("protection", "0"); // not protected
|
||||
prop.put("urgentSetPassword", "1");
|
||||
} else {
|
||||
|
@ -142,7 +149,8 @@ public class Status {
|
|||
prop.put("warningDiskSpaceLow_minSpace", minFree);
|
||||
}
|
||||
if ( !sb.observer.getMemoryAvailable() ) {
|
||||
final String minFree = Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
|
||||
final String minFree =
|
||||
Formatter.bytesToString(sb.observer.getMinFreeMemory() * 1024L * 1024L);
|
||||
prop.put("warningMemoryLow", "1");
|
||||
prop.put("warningMemoryLow_minSpace", minFree);
|
||||
}
|
||||
|
@ -151,7 +159,8 @@ public class Status {
|
|||
|
||||
// version information
|
||||
//final String versionstring = yacyVersion.combined2prettyVersion(sb.getConfig("version","0.1"));
|
||||
final String versionstring = yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
|
||||
final String versionstring =
|
||||
yacyBuildProperties.getVersion() + "/" + yacyBuildProperties.getSVNRevision();
|
||||
prop.put("versionpp", versionstring);
|
||||
|
||||
// place some more hints
|
||||
|
@ -200,7 +209,9 @@ public class Status {
|
|||
prop.put("peerStatistics", "1");
|
||||
prop.put("peerStatistics_uptime", PeerActions.formatInterval(uptime));
|
||||
prop.putNum("peerStatistics_pagesperminute", sb.peers.mySeed().getPPM());
|
||||
prop.putNum("peerStatistics_queriesperhour", Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
|
||||
prop.putNum(
|
||||
"peerStatistics_queriesperhour",
|
||||
Math.round(6000d * sb.peers.mySeed().getQPM()) / 100d);
|
||||
prop.putNum("peerStatistics_links", sb.peers.mySeed().getLinkCount());
|
||||
prop.put("peerStatistics_words", Formatter.number(sb.peers.mySeed().getWordCount()));
|
||||
prop.putNum("peerStatistics_disconnects", sb.peers.peerActions.disconnects);
|
||||
|
@ -215,11 +226,19 @@ public class Status {
|
|||
prop.putXML("peerAddress_peername", sb.peers.mySeed().getName().toLowerCase());
|
||||
}
|
||||
}
|
||||
final String peerStatus = ((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN));
|
||||
if (Seed.PEERTYPE_VIRGIN.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
|
||||
final String peerStatus =
|
||||
((sb.peers.mySeed() == null) ? Seed.PEERTYPE_VIRGIN : sb.peers.mySeed().get(
|
||||
Seed.PEERTYPE,
|
||||
Seed.PEERTYPE_VIRGIN));
|
||||
|
||||
if ( Seed.PEERTYPE_VIRGIN.equals(peerStatus)
|
||||
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
|
||||
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
|
||||
prop.put(PEERSTATUS, "0");
|
||||
prop.put("urgentStatusVirgin", "1");
|
||||
} else if (Seed.PEERTYPE_JUNIOR.equals(peerStatus) && "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))) {
|
||||
} else if ( Seed.PEERTYPE_JUNIOR.equals(peerStatus)
|
||||
&& "freeworld".equals(sb.getConfig(SwitchboardConstants.NETWORK_NAME, ""))
|
||||
&& !SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER.equals(sb.getConfig(SwitchboardConstants.CLUSTER_MODE, ""))) {
|
||||
prop.put(PEERSTATUS, "1");
|
||||
prop.put("warningStatusJunior", "1");
|
||||
} else if ( Seed.PEERTYPE_SENIOR.equals(peerStatus) ) {
|
||||
|
@ -234,9 +253,10 @@ public class Status {
|
|||
prop.put("hash", thisHash);
|
||||
|
||||
final String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
|
||||
if (!"none".equalsIgnoreCase(seedUploadMethod) ||
|
||||
("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 ||
|
||||
sb.getConfig("seedFilePath", "").length() > 0))) {
|
||||
if ( !"none".equalsIgnoreCase(seedUploadMethod)
|
||||
|| ("".equals(seedUploadMethod) && (sb.getConfig("seedFTPPassword", "").length() > 0 || sb
|
||||
.getConfig("seedFilePath", "")
|
||||
.length() > 0)) ) {
|
||||
if ( "".equals(seedUploadMethod) ) {
|
||||
if ( sb.getConfig("seedFTPPassword", "").length() > 0 ) {
|
||||
sb.setConfig("seedUploadMethod", "Ftp");
|
||||
|
@ -256,7 +276,8 @@ public class Status {
|
|||
prop.put(SEEDSERVER, "2"); // enabled
|
||||
prop.putHTML("seedServer_seedFile", sb.getConfig("seedFilePath", ""));
|
||||
}
|
||||
prop.put("seedServer_lastUpload",
|
||||
prop.put(
|
||||
"seedServer_lastUpload",
|
||||
PeerActions.formatInterval(System.currentTimeMillis() - sb.peers.lastSeedUpload_timeStamp));
|
||||
} else {
|
||||
prop.put(SEEDSERVER, "0"); // disabled
|
||||
|
@ -307,11 +328,19 @@ public class Status {
|
|||
prop.putNum("loaderQueueMax", loaderMaxCount);
|
||||
prop.put("loaderQueuePercent", (loaderPercent > 100) ? 100 : loaderPercent);
|
||||
|
||||
prop.putNum("localCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL).getJobCount());
|
||||
prop.put("localCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL) ? "1" : "0");
|
||||
prop.putNum("localCrawlQueueSize", sb
|
||||
.getThread(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
|
||||
.getJobCount());
|
||||
prop.put("localCrawlPaused", sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)
|
||||
? "1"
|
||||
: "0");
|
||||
|
||||
prop.putNum("remoteTriggeredCrawlQueueSize", sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
|
||||
prop.put("remoteTriggeredCrawlPaused",sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
|
||||
prop.putNum(
|
||||
"remoteTriggeredCrawlQueueSize",
|
||||
sb.getThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL).getJobCount());
|
||||
prop.put(
|
||||
"remoteTriggeredCrawlPaused",
|
||||
sb.crawlJobIsPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL) ? "1" : "0");
|
||||
|
||||
prop.putNum("stackCrawlQueueSize", sb.crawlStacker.size());
|
||||
|
||||
|
|
0
htroot/api/bookmarks/posts/add_p.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/add_p.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/all.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/all.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/delete_p.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/delete_p.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/get.java
Executable file → Normal file
0
htroot/api/bookmarks/posts/get.java
Executable file → Normal file
0
htroot/api/bookmarks/tags/editTag_p.java
Executable file → Normal file
0
htroot/api/bookmarks/tags/editTag_p.java
Executable file → Normal file
0
htroot/api/bookmarks/tags/getTag.java
Executable file → Normal file
0
htroot/api/bookmarks/tags/getTag.java
Executable file → Normal file
0
htroot/api/bookmarks/xbel/xbel.java
Executable file → Normal file
0
htroot/api/bookmarks/xbel/xbel.java
Executable file → Normal file
0
htroot/api/feed.java
Executable file → Normal file
0
htroot/api/feed.java
Executable file → Normal file
0
htroot/api/getpageinfo_p.java
Executable file → Normal file
0
htroot/api/getpageinfo_p.java
Executable file → Normal file
0
htroot/api/ynetSearch.java
Executable file → Normal file
0
htroot/api/ynetSearch.java
Executable file → Normal file
0
htroot/compare_yacy.html
Executable file → Normal file
0
htroot/compare_yacy.html
Executable file → Normal file
0
htroot/compare_yacy.java
Executable file → Normal file
0
htroot/compare_yacy.java
Executable file → Normal file
0
htroot/processing/domaingraph/applet/domaingraph.java
Executable file → Normal file
0
htroot/processing/domaingraph/applet/domaingraph.java
Executable file → Normal file
0
htroot/processing/domaingraph/applet/index.html
Executable file → Normal file
0
htroot/processing/domaingraph/applet/index.html
Executable file → Normal file
0
htroot/rssTerminal.html
Executable file → Normal file
0
htroot/rssTerminal.html
Executable file → Normal file
0
htroot/terminal_p.html
Executable file → Normal file
0
htroot/terminal_p.html
Executable file → Normal file
|
@ -81,13 +81,18 @@ import de.anomic.server.serverObjects;
|
|||
import de.anomic.server.serverSwitch;
|
||||
import de.anomic.server.servletProperties;
|
||||
|
||||
public class yacysearch {
|
||||
public class yacysearch
|
||||
{
|
||||
|
||||
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||
public static serverObjects respond(
|
||||
final RequestHeader header,
|
||||
final serverObjects post,
|
||||
final serverSwitch env) {
|
||||
final Switchboard sb = (Switchboard) env;
|
||||
sb.localSearchLastAccess = System.currentTimeMillis();
|
||||
|
||||
final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
|
||||
final boolean searchAllowed =
|
||||
sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
|
||||
|
||||
boolean authenticated = sb.adminAuthenticated(header) >= 2;
|
||||
if ( !authenticated ) {
|
||||
|
@ -96,15 +101,17 @@ public class yacysearch {
|
|||
}
|
||||
final boolean localhostAccess = sb.accessFromLocalhost(header);
|
||||
final String promoteSearchPageGreeting =
|
||||
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ?
|
||||
env.getConfig("network.unit.description", "") :
|
||||
env.getConfig(SwitchboardConstants.GREETING, "");
|
||||
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ? env.getConfig(
|
||||
"network.unit.description",
|
||||
"") : env.getConfig(SwitchboardConstants.GREETING, "");
|
||||
final String client = header.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
|
||||
|
||||
// get query
|
||||
final String originalquerystring = (post == null) ? "" : post.get("query", post.get("search", "")).trim();
|
||||
final String originalquerystring =
|
||||
(post == null) ? "" : post.get("query", post.get("search", "")).trim();
|
||||
String querystring = originalquerystring.replace('+', ' ').replace('*', ' ').trim();
|
||||
CacheStrategy snippetFetchStrategy = (post == null) ? null : CacheStrategy.parse(post.get("verify", "cacheonly"));
|
||||
CacheStrategy snippetFetchStrategy =
|
||||
(post == null) ? null : CacheStrategy.parse(post.get("verify", "cacheonly"));
|
||||
final servletProperties prop = new servletProperties();
|
||||
prop.put("topmenu", sb.getConfigBool("publicTopmenu", true) ? 1 : 0);
|
||||
|
||||
|
@ -124,8 +131,12 @@ public class yacysearch {
|
|||
final boolean rss = EXT.equals("rss");
|
||||
final boolean json = EXT.equals("json");
|
||||
prop.put("promoteSearchPageGreeting", promoteSearchPageGreeting);
|
||||
prop.put("promoteSearchPageGreeting.homepage", sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
|
||||
prop.put("promoteSearchPageGreeting.smallImage", sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
|
||||
prop.put(
|
||||
"promoteSearchPageGreeting.homepage",
|
||||
sb.getConfig(SwitchboardConstants.GREETING_HOMEPAGE, ""));
|
||||
prop.put(
|
||||
"promoteSearchPageGreeting.smallImage",
|
||||
sb.getConfig(SwitchboardConstants.GREETING_SMALL_IMAGE, ""));
|
||||
if ( post == null || indexSegment == null || env == null || !searchAllowed ) {
|
||||
// we create empty entries for template strings
|
||||
prop.put("searchagain", "0");
|
||||
|
@ -140,8 +151,12 @@ public class yacysearch {
|
|||
prop.put("constraint", "");
|
||||
prop.put("cat", "href");
|
||||
prop.put("depth", "0");
|
||||
prop.put("search.verify", (post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh"));
|
||||
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
|
||||
prop.put(
|
||||
"search.verify",
|
||||
(post == null) ? sb.getConfig("search.verify", "iffresh") : post.get("verify", "iffresh"));
|
||||
prop.put(
|
||||
"search.navigation",
|
||||
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
|
||||
prop.put("contentdom", "text");
|
||||
prop.put("contentdomCheckText", "1");
|
||||
prop.put("contentdomCheckAudio", "0");
|
||||
|
@ -180,7 +195,14 @@ public class yacysearch {
|
|||
|
||||
// collect search attributes
|
||||
|
||||
int maximumRecords = Math.min((authenticated) ? (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 100 : 5000) : (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000), post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
|
||||
int maximumRecords =
|
||||
Math.min(
|
||||
(authenticated)
|
||||
? (snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline()
|
||||
? 100
|
||||
: 5000) : (snippetFetchStrategy != null
|
||||
&& snippetFetchStrategy.isAllowedToFetchOnline() ? 20 : 1000),
|
||||
post.getInt("maximumRecords", post.getInt("count", 10))); // SRU syntax with old property as alternative
|
||||
int startRecord = post.getInt("startRecord", post.getInt("offset", 0));
|
||||
|
||||
boolean global = post.get("resource", "local").equals("global") && sb.peers.sizeConnected() > 0;
|
||||
|
@ -198,15 +220,21 @@ public class yacysearch {
|
|||
prefermask = ".*" + prefermask + ".*";
|
||||
}
|
||||
|
||||
Bitfield constraint = (post != null && post.containsKey("constraint") && !post.get("constraint", "").isEmpty()) ? new Bitfield(4, post.get("constraint", "______")) : null;
|
||||
Bitfield constraint =
|
||||
(post != null && post.containsKey("constraint") && !post.get("constraint", "").isEmpty())
|
||||
? new Bitfield(4, post.get("constraint", "______"))
|
||||
: null;
|
||||
if ( indexof ) {
|
||||
constraint = new Bitfield(4);
|
||||
constraint.set(Condenser.flag_cat_indexof, true);
|
||||
}
|
||||
|
||||
// SEARCH
|
||||
final boolean clustersearch = sb.isRobinsonMode() && (sb.getConfig("cluster.mode", "").equals("privatecluster") || sb.getConfig("cluster.mode", "").equals("publiccluster"));
|
||||
final boolean indexReceiveGranted = sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true) || sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true) || clustersearch;
|
||||
final boolean clustersearch = sb.isRobinsonMode() && sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER);
|
||||
final boolean indexReceiveGranted =
|
||||
sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_ALLOW, true)
|
||||
|| sb.getConfigBool(SwitchboardConstants.INDEX_RECEIVE_AUTODISABLED, true)
|
||||
|| clustersearch;
|
||||
global = global && indexReceiveGranted; // if the user does not want indexes from remote peers, it cannot be a global searchnn
|
||||
|
||||
// increase search statistic counter
|
||||
|
@ -225,13 +253,18 @@ public class yacysearch {
|
|||
}
|
||||
|
||||
// find search domain
|
||||
final ContentDomain contentdom = ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
|
||||
final ContentDomain contentdom =
|
||||
ContentDomain.contentdomParser(post == null ? "text" : post.get("contentdom", "text"));
|
||||
|
||||
// patch until better search profiles are available
|
||||
if ( contentdom == ContentDomain.TEXT ) {
|
||||
if (maximumRecords > 50 && maximumRecords < 100) maximumRecords = 10;
|
||||
if ( maximumRecords > 50 && maximumRecords < 100 ) {
|
||||
maximumRecords = 10;
|
||||
}
|
||||
} else {
|
||||
if (maximumRecords <= 32) maximumRecords = 64;
|
||||
if ( maximumRecords <= 32 ) {
|
||||
maximumRecords = 64;
|
||||
}
|
||||
}
|
||||
|
||||
// check the search tracker
|
||||
|
@ -246,33 +279,67 @@ public class yacysearch {
|
|||
snippetFetchStrategy = null;
|
||||
}
|
||||
block = true;
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM "
|
||||
+ client
|
||||
+ " gets no permission to search");
|
||||
} else if ( Domains.matchesList(client, sb.networkWhitelist) ) {
|
||||
Log.logInfo("LOCAL_SEARCH", "ACCESS CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
|
||||
Log.logInfo("LOCAL_SEARCH", "ACCESS CONTROL: WHITELISTED CLIENT FROM "
|
||||
+ client
|
||||
+ " gets no search restrictions");
|
||||
} else if ( !authenticated && !localhostAccess ) {
|
||||
// in case that we do a global search or we want to fetch snippets, we check for DoS cases
|
||||
synchronized ( trackerHandles ) {
|
||||
final int accInThreeSeconds = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size();
|
||||
final int accInOneMinute = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size();
|
||||
final int accInTenMinutes = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size();
|
||||
final int accInThreeSeconds =
|
||||
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size();
|
||||
final int accInOneMinute =
|
||||
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size();
|
||||
final int accInTenMinutes =
|
||||
trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size();
|
||||
// protections against too strong YaCy network load, reduces remote search
|
||||
if ( global ) {
|
||||
if ( accInTenMinutes >= 60 || accInOneMinute >= 6 || accInThreeSeconds >= 1 ) {
|
||||
global = false;
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed global search");
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
|
||||
+ client
|
||||
+ ": "
|
||||
+ accInThreeSeconds
|
||||
+ "/3s, "
|
||||
+ accInOneMinute
|
||||
+ "/60s, "
|
||||
+ accInTenMinutes
|
||||
+ "/600s, "
|
||||
+ " requests, disallowed global search");
|
||||
}
|
||||
}
|
||||
// protection against too many remote server snippet loads (protects traffic on server)
|
||||
if ( snippetFetchStrategy != null && snippetFetchStrategy.isAllowedToFetchOnline() ) {
|
||||
if ( accInTenMinutes >= 20 || accInOneMinute >= 4 || accInThreeSeconds >= 1 ) {
|
||||
snippetFetchStrategy = CacheStrategy.CACHEONLY;
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed remote snippet loading");
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
|
||||
+ client
|
||||
+ ": "
|
||||
+ accInThreeSeconds
|
||||
+ "/3s, "
|
||||
+ accInOneMinute
|
||||
+ "/60s, "
|
||||
+ accInTenMinutes
|
||||
+ "/600s, "
|
||||
+ " requests, disallowed remote snippet loading");
|
||||
}
|
||||
}
|
||||
// general load protection
|
||||
if ( accInTenMinutes >= 3000 || accInOneMinute >= 600 || accInThreeSeconds >= 60 ) {
|
||||
block = true;
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed search");
|
||||
Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM "
|
||||
+ client
|
||||
+ ": "
|
||||
+ accInThreeSeconds
|
||||
+ "/3s, "
|
||||
+ accInOneMinute
|
||||
+ "/60s, "
|
||||
+ accInTenMinutes
|
||||
+ "/600s, "
|
||||
+ " requests, disallowed search");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -363,7 +430,9 @@ public class yacysearch {
|
|||
}
|
||||
String ft = querystring.substring(filetype + 9, ftb);
|
||||
querystring = querystring.replace("filetype:" + ft, "");
|
||||
while (!ft.isEmpty() && ft.charAt(0) == '.') ft = ft.substring(1);
|
||||
while ( !ft.isEmpty() && ft.charAt(0) == '.' ) {
|
||||
ft = ft.substring(1);
|
||||
}
|
||||
if ( !ft.isEmpty() ) {
|
||||
if ( urlmask == null ) {
|
||||
urlmask = ".*\\." + ft;
|
||||
|
@ -382,7 +451,9 @@ public class yacysearch {
|
|||
if ( tenant != null ) {
|
||||
if ( urlmask == null ) {
|
||||
urlmask = ".*" + tenant + ".*";
|
||||
} else urlmask = ".*" + tenant + urlmask;
|
||||
} else {
|
||||
urlmask = ".*" + tenant + urlmask;
|
||||
}
|
||||
}
|
||||
}
|
||||
final int site = querystring.indexOf("site:", 0);
|
||||
|
@ -484,7 +555,8 @@ public class yacysearch {
|
|||
}
|
||||
|
||||
// navigation
|
||||
final String navigation = (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
|
||||
final String navigation =
|
||||
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "");
|
||||
|
||||
// the query
|
||||
final TreeSet<String>[] query = QueryParams.cleanQuery(querystring.trim()); // converts also umlaute
|
||||
|
@ -515,7 +587,10 @@ public class yacysearch {
|
|||
map.put("urlhash", delHash);
|
||||
map.put("vote", "negative");
|
||||
map.put("refid", "");
|
||||
sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_SURFTIPP_VOTE_ADD, map);
|
||||
sb.peers.newsPool.publishMyNews(
|
||||
sb.peers.mySeed(),
|
||||
NewsPool.CATEGORY_SURFTIPP_VOTE_ADD,
|
||||
map);
|
||||
}
|
||||
|
||||
// delete the search history since this still shows the entry
|
||||
|
@ -536,7 +611,12 @@ public class yacysearch {
|
|||
if ( urlentry != null ) {
|
||||
Document[] documents = null;
|
||||
try {
|
||||
documents = sb.loader.loadDocuments(sb.loader.request(urlentry.url(), true, false), CacheStrategy.IFEXIST, 5000, Integer.MAX_VALUE);
|
||||
documents =
|
||||
sb.loader.loadDocuments(
|
||||
sb.loader.request(urlentry.url(), true, false),
|
||||
CacheStrategy.IFEXIST,
|
||||
5000,
|
||||
Integer.MAX_VALUE);
|
||||
} catch ( final IOException e ) {
|
||||
} catch ( final Parser.Failure e ) {
|
||||
}
|
||||
|
@ -548,7 +628,10 @@ public class yacysearch {
|
|||
map.put("description", documents[0].dc_title().replace(',', ' '));
|
||||
map.put("author", documents[0].dc_creator());
|
||||
map.put("tags", documents[0].dc_subject(' '));
|
||||
sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_SURFTIPP_ADD, map);
|
||||
sb.peers.newsPool.publishMyNews(
|
||||
sb.peers.mySeed(),
|
||||
NewsPool.CATEGORY_SURFTIPP_ADD,
|
||||
map);
|
||||
documents[0].close();
|
||||
}
|
||||
}
|
||||
|
@ -564,7 +647,13 @@ public class yacysearch {
|
|||
final URIMetadataRow urlentry = indexSegment.urlMetadata().load(UTF8.getBytes(bookmarkHash));
|
||||
if ( urlentry != null ) {
|
||||
try {
|
||||
sb.tables.bookmarks.createBookmark(sb.loader, urlentry.url(), YMarkTables.USER_ADMIN, true, "searchresult", "/search");
|
||||
sb.tables.bookmarks.createBookmark(
|
||||
sb.loader,
|
||||
urlentry.url(),
|
||||
YMarkTables.USER_ADMIN,
|
||||
true,
|
||||
"searchresult",
|
||||
"/search");
|
||||
} catch ( final Throwable e ) {
|
||||
}
|
||||
}
|
||||
|
@ -593,7 +682,8 @@ public class yacysearch {
|
|||
prefermask = "";
|
||||
}
|
||||
|
||||
final QueryParams theQuery = new QueryParams(
|
||||
final QueryParams theQuery =
|
||||
new QueryParams(
|
||||
originalquerystring,
|
||||
queryHashes,
|
||||
Word.words2hashesHandles(query[1]),
|
||||
|
@ -610,8 +700,9 @@ public class yacysearch {
|
|||
maximumRecords,
|
||||
startRecord,
|
||||
urlmask,
|
||||
clustersearch && global ? QueryParams.Searchdom.CLUSTER :
|
||||
(global && indexReceiveGranted ? QueryParams.Searchdom.GLOBAL : QueryParams.Searchdom.LOCAL),
|
||||
clustersearch && global ? QueryParams.Searchdom.CLUSTER : (global && indexReceiveGranted
|
||||
? QueryParams.Searchdom.GLOBAL
|
||||
: QueryParams.Searchdom.LOCAL),
|
||||
20,
|
||||
constraint,
|
||||
true,
|
||||
|
@ -624,9 +715,16 @@ public class yacysearch {
|
|||
indexSegment,
|
||||
ranking,
|
||||
header.get(RequestHeader.USER_AGENT, ""),
|
||||
sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false) && sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false) && sb.peers.mySeed().getFlagAcceptRemoteIndex());
|
||||
sb.getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, false)
|
||||
&& sb.getConfigBool(SwitchboardConstants.NETWORK_SEARCHVERIFY, false)
|
||||
&& sb.peers.mySeed().getFlagAcceptRemoteIndex());
|
||||
EventTracker.delete(EventTracker.EClass.SEARCH);
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(theQuery.id(true), SearchEvent.Type.INITIALIZATION, "", 0, 0), false);
|
||||
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(
|
||||
theQuery.id(true),
|
||||
SearchEvent.Type.INITIALIZATION,
|
||||
"",
|
||||
0,
|
||||
0), false);
|
||||
|
||||
// tell all threads to do nothing for a specific time
|
||||
sb.intermissionAllThreads(3000);
|
||||
|
@ -635,8 +733,19 @@ public class yacysearch {
|
|||
theQuery.filterOut(Switchboard.blueList);
|
||||
|
||||
// log
|
||||
Log.logInfo("LOCAL_SEARCH", "INIT WORD SEARCH: " + theQuery.queryString + ":" + QueryParams.hashSet2hashString(theQuery.queryHashes) + " - " + theQuery.neededResults() + " links to be computed, " + theQuery.displayResults() + " lines to be displayed");
|
||||
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(new RSSMessage("Local Search Request", theQuery.queryString, ""));
|
||||
Log.logInfo(
|
||||
"LOCAL_SEARCH",
|
||||
"INIT WORD SEARCH: "
|
||||
+ theQuery.queryString
|
||||
+ ":"
|
||||
+ QueryParams.hashSet2hashString(theQuery.queryHashes)
|
||||
+ " - "
|
||||
+ theQuery.neededResults()
|
||||
+ " links to be computed, "
|
||||
+ theQuery.displayResults()
|
||||
+ " lines to be displayed");
|
||||
EventChannel.channels(EventChannel.LOCALSEARCH).addMessage(
|
||||
new RSSMessage("Local Search Request", theQuery.queryString, ""));
|
||||
final long timestamp = System.currentTimeMillis();
|
||||
|
||||
// create a new search event
|
||||
|
@ -644,10 +753,20 @@ public class yacysearch {
|
|||
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
|
||||
startRecord = 0;
|
||||
}
|
||||
final SearchEvent theSearch = SearchEventCache.getEvent(
|
||||
theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader,
|
||||
(int) sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 10)),
|
||||
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_USER, sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)),
|
||||
final SearchEvent theSearch =
|
||||
SearchEventCache.getEvent(
|
||||
theQuery,
|
||||
sb.peers,
|
||||
sb.tables,
|
||||
(sb.isRobinsonMode()) ? sb.clusterhashes : null,
|
||||
false,
|
||||
sb.loader,
|
||||
(int) sb.getConfigLong(
|
||||
SwitchboardConstants.REMOTESEARCH_MAXCOUNT_USER,
|
||||
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXCOUNT_DEFAULT, 10)),
|
||||
sb.getConfigLong(
|
||||
SwitchboardConstants.REMOTESEARCH_MAXTIME_USER,
|
||||
sb.getConfigLong(SwitchboardConstants.REMOTESEARCH_MAXTIME_DEFAULT, 3000)),
|
||||
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0),
|
||||
(int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
|
||||
|
||||
|
@ -655,7 +774,8 @@ public class yacysearch {
|
|||
if ( sitehost != null && sb.getConfigBool("heuristic.site", false) && authenticated ) {
|
||||
sb.heuristicSite(theSearch, sitehost);
|
||||
}
|
||||
if ((heuristicScroogle >= 0 || sb.getConfigBool("heuristic.scroogle", false)) && authenticated) {
|
||||
if ( (heuristicScroogle >= 0 || sb.getConfigBool("heuristic.scroogle", false))
|
||||
&& authenticated ) {
|
||||
sb.heuristicScroogle(theSearch);
|
||||
}
|
||||
if ( (heuristicBlekko >= 0 || sb.getConfigBool("heuristic.blekko", false)) && authenticated ) {
|
||||
|
@ -664,15 +784,30 @@ public class yacysearch {
|
|||
}
|
||||
|
||||
// log
|
||||
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: " + theQuery.queryString + " - " +
|
||||
"local-unfiltered(" + theSearch.getRankingResult().getLocalIndexCount() + "), " +
|
||||
"local_miss(" + theSearch.getRankingResult().getMissCount() + "), " +
|
||||
"local_sortout(" + theSearch.getRankingResult().getSortOutCount() + "), " +
|
||||
"remote(" + theSearch.getRankingResult().getRemoteResourceSize() + ") links found, " +
|
||||
(System.currentTimeMillis() - timestamp) + " ms");
|
||||
Log.logInfo("LOCAL_SEARCH", "EXIT WORD SEARCH: "
|
||||
+ theQuery.queryString
|
||||
+ " - "
|
||||
+ "local-unfiltered("
|
||||
+ theSearch.getRankingResult().getLocalIndexCount()
|
||||
+ "), "
|
||||
+ "local_miss("
|
||||
+ theSearch.getRankingResult().getMissCount()
|
||||
+ "), "
|
||||
+ "local_sortout("
|
||||
+ theSearch.getRankingResult().getSortOutCount()
|
||||
+ "), "
|
||||
+ "remote("
|
||||
+ theSearch.getRankingResult().getRemoteResourceSize()
|
||||
+ ") links found, "
|
||||
+ (System.currentTimeMillis() - timestamp)
|
||||
+ " ms");
|
||||
|
||||
// prepare search statistics
|
||||
theQuery.resultcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
|
||||
theQuery.resultcount =
|
||||
theSearch.getRankingResult().getLocalIndexCount()
|
||||
- theSearch.getRankingResult().getMissCount()
|
||||
- theSearch.getRankingResult().getSortOutCount()
|
||||
+ theSearch.getRankingResult().getRemoteIndexCount();
|
||||
theQuery.searchtime = System.currentTimeMillis() - timestamp;
|
||||
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
|
||||
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
|
||||
|
@ -683,16 +818,23 @@ public class yacysearch {
|
|||
|
||||
prop.put("meanCount", meanMax);
|
||||
if ( meanMax > 0 && !json && !rss ) {
|
||||
final DidYouMean didYouMean = new DidYouMean(indexSegment.termIndex(), new StringBuilder(querystring));
|
||||
final DidYouMean didYouMean =
|
||||
new DidYouMean(indexSegment.termIndex(), new StringBuilder(querystring));
|
||||
final Iterator<StringBuilder> meanIt = didYouMean.getSuggestions(100, 5).iterator();
|
||||
int meanCount = 0;
|
||||
String suggestion;
|
||||
while ( meanCount < meanMax && meanIt.hasNext() ) {
|
||||
suggestion = meanIt.next().toString();
|
||||
prop.put("didYouMean_suggestions_" + meanCount + "_word", suggestion);
|
||||
prop.put("didYouMean_suggestions_"+meanCount+"_url",
|
||||
QueryParams.navurl("html", 0, theQuery, suggestion, originalUrlMask.toString(), theQuery.navigators).toString()
|
||||
);
|
||||
prop.put(
|
||||
"didYouMean_suggestions_" + meanCount + "_url",
|
||||
QueryParams.navurl(
|
||||
"html",
|
||||
0,
|
||||
theQuery,
|
||||
suggestion,
|
||||
originalUrlMask.toString(),
|
||||
theQuery.navigators).toString());
|
||||
prop.put("didYouMean_suggestions_" + meanCount + "_sep", "|");
|
||||
meanCount++;
|
||||
}
|
||||
|
@ -714,7 +856,9 @@ public class yacysearch {
|
|||
prop.put("geoinfo_loc_" + i + "_lat", Math.round(c.lat() * 10000.0f) / 10000.0f);
|
||||
prop.put("geoinfo_loc_" + i + "_name", c.getName());
|
||||
i++;
|
||||
if (i >= 10) break;
|
||||
if ( i >= 10 ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
prop.put("geoinfo_loc", i);
|
||||
prop.put("geoinfo", "1");
|
||||
|
@ -725,39 +869,71 @@ public class yacysearch {
|
|||
synchronized ( trackerHandles ) {
|
||||
trackerHandles.add(theQuery.time);
|
||||
while ( trackerHandles.size() > 600 ) {
|
||||
if (!trackerHandles.remove(trackerHandles.first())) break;
|
||||
if ( !trackerHandles.remove(trackerHandles.first()) ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
sb.localSearchTracker.put(client, trackerHandles);
|
||||
if ( sb.localSearchTracker.size() > 100 ) {
|
||||
sb.localSearchTracker.remove(sb.localSearchTracker.keys().nextElement());
|
||||
}
|
||||
if (MemoryControl.shortStatus()) sb.localSearchTracker.clear();
|
||||
if ( MemoryControl.shortStatus() ) {
|
||||
sb.localSearchTracker.clear();
|
||||
}
|
||||
} catch ( final Exception e ) {
|
||||
Log.logException(e);
|
||||
}
|
||||
|
||||
final int indexcount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount() - theSearch.getRankingResult().getSortOutCount() + theSearch.getRankingResult().getRemoteIndexCount();
|
||||
final int indexcount =
|
||||
theSearch.getRankingResult().getLocalIndexCount()
|
||||
- theSearch.getRankingResult().getMissCount()
|
||||
- theSearch.getRankingResult().getSortOutCount()
|
||||
+ theSearch.getRankingResult().getRemoteIndexCount();
|
||||
prop.put("num-results_offset", startRecord == 0 ? 0 : startRecord + 1);
|
||||
prop.put("num-results_itemscount", Formatter.number(startRecord + theSearch.getQuery().itemsPerPage > indexcount ? startRecord + indexcount % theSearch.getQuery().itemsPerPage : startRecord + theSearch.getQuery().itemsPerPage, true));
|
||||
prop.put("num-results_itemscount", Formatter.number(
|
||||
startRecord + theSearch.getQuery().itemsPerPage > indexcount ? startRecord
|
||||
+ indexcount
|
||||
% theSearch.getQuery().itemsPerPage : startRecord + theSearch.getQuery().itemsPerPage,
|
||||
true));
|
||||
prop.put("num-results_itemsPerPage", maximumRecords);
|
||||
prop.put("num-results_totalcount", Formatter.number(indexcount, true));
|
||||
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch) ? "1" : "0");
|
||||
prop.put("num-results_globalresults_localResourceSize", Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
|
||||
prop.put("num-results_globalresults_localMissCount", Formatter.number(theSearch.getRankingResult().getMissCount(), true));
|
||||
prop.put("num-results_globalresults_remoteResourceSize", Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
|
||||
prop.put("num-results_globalresults_remoteIndexCount", Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
|
||||
prop.put("num-results_globalresults_remotePeerCount", Formatter.number(theSearch.getRankingResult().getRemotePeerCount(), true));
|
||||
prop.put("num-results_globalresults", global && (indexReceiveGranted || clustersearch)
|
||||
? "1"
|
||||
: "0");
|
||||
prop.put(
|
||||
"num-results_globalresults_localResourceSize",
|
||||
Formatter.number(theSearch.getRankingResult().getLocalIndexCount(), true));
|
||||
prop.put(
|
||||
"num-results_globalresults_localMissCount",
|
||||
Formatter.number(theSearch.getRankingResult().getMissCount(), true));
|
||||
prop.put(
|
||||
"num-results_globalresults_remoteResourceSize",
|
||||
Formatter.number(theSearch.getRankingResult().getRemoteResourceSize(), true));
|
||||
prop.put(
|
||||
"num-results_globalresults_remoteIndexCount",
|
||||
Formatter.number(theSearch.getRankingResult().getRemoteIndexCount(), true));
|
||||
prop.put(
|
||||
"num-results_globalresults_remotePeerCount",
|
||||
Formatter.number(theSearch.getRankingResult().getRemotePeerCount(), true));
|
||||
|
||||
// compose page navigation
|
||||
final StringBuilder resnav = new StringBuilder(200);
|
||||
final int thispage = startRecord / theQuery.displayResults();
|
||||
if ( thispage == 0 ) {
|
||||
resnav.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /> ");
|
||||
resnav
|
||||
.append("<img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /> ");
|
||||
} else {
|
||||
resnav.append("<a id=\"prevpage\" href=\"");
|
||||
resnav.append(QueryParams.navurl("html", thispage - 1, theQuery, null, originalUrlMask, navigation).toString());
|
||||
resnav.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a> ");
|
||||
resnav.append(QueryParams.navurl(
|
||||
"html",
|
||||
thispage - 1,
|
||||
theQuery,
|
||||
null,
|
||||
originalUrlMask,
|
||||
navigation).toString());
|
||||
resnav
|
||||
.append("\"><img src=\"env/grafics/navdl.gif\" alt=\"arrowleft\" width=\"16\" height=\"16\" /></a> ");
|
||||
}
|
||||
final int numberofpages = Math.min(10, 1 + ((indexcount - 1) / theQuery.displayResults()));
|
||||
|
||||
|
@ -770,7 +946,9 @@ public class yacysearch {
|
|||
resnav.append("\" width=\"16\" height=\"16\" /> ");
|
||||
} else {
|
||||
resnav.append("<a href=\"");
|
||||
resnav.append(QueryParams.navurl("html", i, theQuery, null, originalUrlMask, navigation).toString());
|
||||
resnav.append(QueryParams
|
||||
.navurl("html", i, theQuery, null, originalUrlMask, navigation)
|
||||
.toString());
|
||||
resnav.append("\"><img src=\"env/grafics/navd");
|
||||
resnav.append(i + 1);
|
||||
resnav.append(".gif\" alt=\"page");
|
||||
|
@ -779,11 +957,19 @@ public class yacysearch {
|
|||
}
|
||||
}
|
||||
if ( thispage >= numberofpages ) {
|
||||
resnav.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
|
||||
resnav
|
||||
.append("<img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" />");
|
||||
} else {
|
||||
resnav.append("<a id=\"nextpage\" href=\"");
|
||||
resnav.append(QueryParams.navurl("html", thispage + 1, theQuery, null, originalUrlMask, navigation).toString());
|
||||
resnav.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
|
||||
resnav.append(QueryParams.navurl(
|
||||
"html",
|
||||
thispage + 1,
|
||||
theQuery,
|
||||
null,
|
||||
originalUrlMask,
|
||||
navigation).toString());
|
||||
resnav
|
||||
.append("\"><img src=\"env/grafics/navdr.gif\" alt=\"arrowright\" width=\"16\" height=\"16\" /></a>");
|
||||
}
|
||||
final String resnavs = resnav.toString();
|
||||
prop.put("num-results_resnav", resnavs);
|
||||
|
@ -796,7 +982,12 @@ public class yacysearch {
|
|||
prop.put("results_" + i + "_eventID", theQuery.id(false));
|
||||
}
|
||||
prop.put("results", theQuery.displayResults());
|
||||
prop.put("resultTable", (contentdom == ContentDomain.APP || contentdom == ContentDomain.AUDIO || contentdom == ContentDomain.VIDEO) ? 1 : 0);
|
||||
prop
|
||||
.put(
|
||||
"resultTable",
|
||||
(contentdom == ContentDomain.APP || contentdom == ContentDomain.AUDIO || contentdom == ContentDomain.VIDEO)
|
||||
? 1
|
||||
: 0);
|
||||
prop.put("eventID", theQuery.id(false)); // for bottomline
|
||||
|
||||
// process result of search
|
||||
|
@ -838,10 +1029,20 @@ public class yacysearch {
|
|||
prop.putHTML("prefermaskfilter", prefermask);
|
||||
prop.put("indexof", (indexof) ? "on" : "off");
|
||||
prop.put("constraint", (constraint == null) ? "" : constraint.exportB64());
|
||||
prop.put("search.verify", snippetFetchStrategy == null ? sb.getConfig("search.verify", "iffresh") : snippetFetchStrategy.toName());
|
||||
prop.put("search.navigation", (post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
|
||||
prop.put("search.verify", snippetFetchStrategy == null
|
||||
? sb.getConfig("search.verify", "iffresh")
|
||||
: snippetFetchStrategy.toName());
|
||||
prop.put(
|
||||
"search.navigation",
|
||||
(post == null) ? sb.getConfig("search.navigation", "all") : post.get("nav", "all"));
|
||||
prop.put("contentdom", (post == null ? "text" : post.get("contentdom", "text")));
|
||||
prop.put("searchdomswitches", sb.getConfigBool("search.text", true) || sb.getConfigBool("search.audio", true) || sb.getConfigBool("search.video", true) || sb.getConfigBool("search.image", true) || sb.getConfigBool("search.app", true) ? 1 : 0);
|
||||
prop.put(
|
||||
"searchdomswitches",
|
||||
sb.getConfigBool("search.text", true)
|
||||
|| sb.getConfigBool("search.audio", true)
|
||||
|| sb.getConfigBool("search.video", true)
|
||||
|| sb.getConfigBool("search.image", true)
|
||||
|| sb.getConfigBool("search.app", true) ? 1 : 0);
|
||||
prop.put("searchdomswitches_searchtext", sb.getConfigBool("search.text", true) ? 1 : 0);
|
||||
prop.put("searchdomswitches_searchaudio", sb.getConfigBool("search.audio", true) ? 1 : 0);
|
||||
prop.put("searchdomswitches_searchvideo", sb.getConfigBool("search.video", true) ? 1 : 0);
|
||||
|
|
319
skins/28c3.css
Normal file
319
skins/28c3.css
Normal file
|
@ -0,0 +1,319 @@
|
|||
/* generic skin */
|
||||
|
||||
/*
|
||||
The following colours must be defined:
|
||||
#000000
|
||||
#A3CC8B
|
||||
#38A535
|
||||
#E08040
|
||||
#333333
|
||||
#222222
|
||||
#FFCCCC
|
||||
#888888
|
||||
#990000
|
||||
#009900
|
||||
#000099
|
||||
#FFFFFF
|
||||
#008000
|
||||
#800000
|
||||
*/
|
||||
|
||||
body {
|
||||
background-color:#000000;
|
||||
color:#A3CC8B;
|
||||
}
|
||||
|
||||
a:link {
|
||||
color:#A3CC8B;
|
||||
background-color:transparent;
|
||||
}
|
||||
|
||||
a:link:hover {
|
||||
color: #38A535;
|
||||
background-color:transparent;
|
||||
}
|
||||
|
||||
|
||||
/* Menu */
|
||||
|
||||
.menugroup h3 {
|
||||
-webkit-border-top-left-radius: 5px;
|
||||
-webkit-border-top-right-radius: 5px;
|
||||
-khtml-border-top-left-radius: 5px;
|
||||
-khtml-border-top-right-radius: 5px;
|
||||
-moz-border-radius-topleft: 5px;
|
||||
-moz-border-radius-topright: 5px;
|
||||
border-top-left-radius: 5px;
|
||||
border-top-right-radius: 5px;
|
||||
|
||||
background-color: #E08040;
|
||||
color:white;
|
||||
}
|
||||
|
||||
.SubMenugroup h3, .SubMenu h3 {
|
||||
-webkit-border-radius: 5px;
|
||||
-khtml-border-radius: 5px;
|
||||
-moz-border-radius: 5px;
|
||||
border-radius: 5px;
|
||||
|
||||
background-color:#E08040;
|
||||
color:white;
|
||||
}
|
||||
|
||||
a.MenuItemLink, ul.SubMenu em {
|
||||
background-color:#333333;
|
||||
color:#A3CC8B;
|
||||
}
|
||||
|
||||
a:hover.MenuItemLink {
|
||||
background-color:transparent;
|
||||
color:#A3CC8B;
|
||||
}
|
||||
|
||||
/* Head */
|
||||
|
||||
div.head h1 {
|
||||
background-color:transparent;
|
||||
text-align:left;
|
||||
padding-left:70px;
|
||||
color:#A3CC8B;
|
||||
}
|
||||
|
||||
/* Tables */
|
||||
|
||||
table {
|
||||
}
|
||||
|
||||
.TableHeader {
|
||||
background-color: #38A535;
|
||||
color:white;
|
||||
}
|
||||
|
||||
.TableCellDark {
|
||||
background-color: #333333;
|
||||
}
|
||||
|
||||
.TableCellLight {
|
||||
background-color: #222222;
|
||||
}
|
||||
|
||||
.TableCellSummary {
|
||||
background-color: #FFCCCC;
|
||||
border:1px solid #888888;
|
||||
}
|
||||
|
||||
.TableCellActive {
|
||||
background-color: #FFCCCC;
|
||||
}
|
||||
|
||||
/* Blog and Wiki*/
|
||||
|
||||
.Post {
|
||||
background-color:#000000;
|
||||
}
|
||||
|
||||
.PostSubject {
|
||||
background-color:#000000;
|
||||
}
|
||||
|
||||
.PostSubject a {
|
||||
color:#A3CC8B;
|
||||
}
|
||||
|
||||
.PostInfo {
|
||||
background-color:#000000;
|
||||
}
|
||||
|
||||
/* Wiki */
|
||||
|
||||
.WikiTOCBox {
|
||||
border: 1px solid #888888;
|
||||
background-color: #000000;
|
||||
}
|
||||
|
||||
a.unknown {
|
||||
color:#990000;
|
||||
}
|
||||
|
||||
a.known {
|
||||
color:#009900;
|
||||
}
|
||||
|
||||
a.extern {
|
||||
color:#000099;
|
||||
}
|
||||
|
||||
/* in Bookmarks */
|
||||
|
||||
.bookmark {
|
||||
border-bottom:1px #888888 dashed;
|
||||
}
|
||||
|
||||
a.bookmarkTitle {
|
||||
color: #E08040;
|
||||
}
|
||||
|
||||
a:hover.bookmarkTitle {
|
||||
color: #E08040;
|
||||
}
|
||||
|
||||
a.bookmarkTags {
|
||||
color: #FFFFFF;
|
||||
}
|
||||
|
||||
a:hover.bookmarkTags {
|
||||
color: #E08040;
|
||||
}
|
||||
|
||||
a.bookmarkAction {
|
||||
color: #888888;
|
||||
}
|
||||
|
||||
.Tags {
|
||||
border-left: 2px solid #A3CC8B;
|
||||
}
|
||||
|
||||
.diff { background-color: #000000; }
|
||||
.diff .unchanged { color: #000099; }
|
||||
.diff .added { color: #009900; background-color: #000000; }
|
||||
.diff .deleted { color: #990000; background-color: #000000; }
|
||||
|
||||
/* in Status.html */
|
||||
|
||||
.ProgressBar {
|
||||
border: #000000 solid 1px;
|
||||
}
|
||||
div.ProgressBarFill {
|
||||
background-color:#333333;
|
||||
}
|
||||
|
||||
|
||||
/* Copyright info */
|
||||
|
||||
div#api {
|
||||
position:absolute;
|
||||
top:3px;
|
||||
right:20px;
|
||||
z-index: 100;
|
||||
}
|
||||
|
||||
div#yacylivesearch {
|
||||
float:right;
|
||||
margin-right: 90px;
|
||||
margin-top: -24px;
|
||||
}
|
||||
|
||||
/* Searchresults */
|
||||
|
||||
fieldset.maininput, fieldset.yacys {
|
||||
background-color:#333333;
|
||||
-webkit-border-radius: 5px;
|
||||
-khtml-border-radius: 5px;
|
||||
-moz-border-radius: 5px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
form.search.small h2 {
|
||||
border-bottom:1px solid #888888;
|
||||
}
|
||||
|
||||
.searchresults {
|
||||
/*border-top:1px #888888 dashed;*/
|
||||
}
|
||||
|
||||
.searchresults h4 a {
|
||||
font-size:1.2em;
|
||||
font-weight:normal;
|
||||
text-decoration:underline;
|
||||
color:#FFFFFF;
|
||||
}
|
||||
.searchresults h4 a:link:hover {
|
||||
color:#FFFFFF;
|
||||
}
|
||||
|
||||
.snippetLoaded strong {
|
||||
color:black;
|
||||
}
|
||||
|
||||
.searchresults .url a {
|
||||
color:#008000;
|
||||
}
|
||||
.searchresults .url a:link:hover {
|
||||
color:#800000;
|
||||
}
|
||||
|
||||
|
||||
/* other */
|
||||
|
||||
.settingsValue {
|
||||
color:#000099;
|
||||
}
|
||||
|
||||
.Headline {
|
||||
background-color: #E08040;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.Heading {
|
||||
background-color: #E08040;
|
||||
}
|
||||
|
||||
.error, .warning {
|
||||
color:red;
|
||||
}
|
||||
|
||||
.success {
|
||||
color:green;
|
||||
}
|
||||
|
||||
.Message {
|
||||
background-color: #000000;
|
||||
}
|
||||
|
||||
.example {
|
||||
background-color:#000099;
|
||||
}
|
||||
|
||||
.hides:hover .hoverShow { background-color: #000000; }
|
||||
|
||||
/* Log */
|
||||
|
||||
body#ViewLog pre {
|
||||
background-color:white;
|
||||
}
|
||||
|
||||
/* Forms */
|
||||
|
||||
fieldset {
|
||||
background-color:#333333;
|
||||
color:#A3CC8B;
|
||||
border:0px solid #333333;
|
||||
-webkit-border-radius: 5px;
|
||||
-khtml-border-radius: 5px;
|
||||
-moz-border-radius: 5px;
|
||||
border-radius: 5px;
|
||||
}
|
||||
|
||||
/* input, select, textarea, button {
|
||||
color: black;
|
||||
background-color: white;
|
||||
} */
|
||||
|
||||
legend {
|
||||
background-color:#38A535;
|
||||
-webkit-border-radius: 5px;
|
||||
-khtml-border-radius: 5px;
|
||||
-moz-border-radius: 5px;
|
||||
border-radius: 5px;
|
||||
text-align:left;
|
||||
color:white;
|
||||
}
|
||||
|
||||
form dt, dl.pairs dt {
|
||||
background-color:#333333;
|
||||
font-weight:bold;
|
||||
}
|
||||
|
||||
form dd, dl.pairs dd {
|
||||
background-color:#333333;
|
||||
}
|
|
@ -142,11 +142,19 @@ public class CrawlQueues {
|
|||
* @return if the hash exists, the name of the database is returned, otherwise null is returned
|
||||
*/
|
||||
public String urlExists(final byte[] hash) {
|
||||
if (this.delegatedURL.exists(hash)) return "delegated";
|
||||
if (this.errorURL.exists(hash)) return "errors";
|
||||
if (this.noticeURL.existsInStack(hash)) return "crawler";
|
||||
if (this.delegatedURL.exists(hash)) {
|
||||
return "delegated";
|
||||
}
|
||||
if (this.errorURL.exists(hash)) {
|
||||
return "errors";
|
||||
}
|
||||
if (this.noticeURL.existsInStack(hash)) {
|
||||
return "crawler";
|
||||
}
|
||||
for (final Loader worker: this.workers.values()) {
|
||||
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) return "worker";
|
||||
if (Base64Order.enhancedCoder.equal(worker.request.url().hash(), hash)) {
|
||||
return "worker";
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -159,16 +167,26 @@ public class CrawlQueues {
|
|||
|
||||
public DigestURI getURL(final byte[] urlhash) {
|
||||
assert urlhash != null;
|
||||
if (urlhash == null || urlhash.length == 0) return null;
|
||||
if (urlhash == null || urlhash.length == 0) {
|
||||
return null;
|
||||
}
|
||||
ZURL.Entry ee = this.delegatedURL.get(urlhash);
|
||||
if (ee != null) return ee.url();
|
||||
if (ee != null) {
|
||||
return ee.url();
|
||||
}
|
||||
ee = this.errorURL.get(urlhash);
|
||||
if (ee != null) return ee.url();
|
||||
if (ee != null) {
|
||||
return ee.url();
|
||||
}
|
||||
for (final Loader w: this.workers.values()) {
|
||||
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) return w.request.url();
|
||||
if (Base64Order.enhancedCoder.equal(w.request.url().hash(), urlhash)) {
|
||||
return w.request.url();
|
||||
}
|
||||
}
|
||||
final Request ne = this.noticeURL.get(urlhash);
|
||||
if (ne != null) return ne.url();
|
||||
if (ne != null) {
|
||||
return ne.url();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -176,7 +194,9 @@ public class CrawlQueues {
|
|||
// wait for all workers to finish
|
||||
final int timeout = (int) this.sb.getConfigLong("crawler.clientTimeout", 10000);
|
||||
for (final Loader w: this.workers.values()) {
|
||||
if (w.age() > timeout) w.interrupt();
|
||||
if (w.age() > timeout) {
|
||||
w.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,7 +205,9 @@ public class CrawlQueues {
|
|||
final Request[] e = new Request[this.workers.size()];
|
||||
int i = 0;
|
||||
for (final Loader w: this.workers.values()) {
|
||||
if (i >= e.length) break;
|
||||
if (i >= e.length) {
|
||||
break;
|
||||
}
|
||||
e[i++] = w.request;
|
||||
}
|
||||
return e;
|
||||
|
@ -197,10 +219,8 @@ public class CrawlQueues {
|
|||
}
|
||||
|
||||
public boolean coreCrawlJob() {
|
||||
|
||||
final boolean robinsonPrivateCase = (this.sb.isRobinsonMode() &&
|
||||
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER) &&
|
||||
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_CLUSTER));
|
||||
!this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PUBLIC_CLUSTER));
|
||||
|
||||
if ((robinsonPrivateCase || coreCrawlJobSize() <= 20) && limitCrawlJobSize() > 0) {
|
||||
// move some tasks to the core crawl job so we have something to do
|
||||
|
@ -216,12 +236,16 @@ public class CrawlQueues {
|
|||
final String queueCheckCore = loadIsPossible(NoticedURL.StackType.CORE);
|
||||
final String queueCheckNoload = loadIsPossible(NoticedURL.StackType.NOLOAD);
|
||||
if (queueCheckCore != null && queueCheckNoload != null) {
|
||||
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("omitting de-queue/local: " + queueCheckCore + ":" + queueCheckNoload);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isPaused(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL)) {
|
||||
if (this.log.isFine()) this.log.logFine("omitting de-queue/local: paused");
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("omitting de-queue/local: paused");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -238,7 +262,9 @@ public class CrawlQueues {
|
|||
if (this.noticeURL.stackSize(NoticedURL.StackType.NOLOAD) > 0) {
|
||||
// get one entry that will not be loaded, just indexed
|
||||
urlEntry = this.noticeURL.pop(NoticedURL.StackType.NOLOAD, true, this.sb.crawler);
|
||||
if (urlEntry == null) continue;
|
||||
if (urlEntry == null) {
|
||||
continue;
|
||||
}
|
||||
final String profileHandle = urlEntry.profileHandle();
|
||||
if (profileHandle == null) {
|
||||
this.log.logSevere(stats + ": NULL PROFILE HANDLE '" + urlEntry.profileHandle() + "' for URL " + urlEntry.url());
|
||||
|
@ -259,7 +285,9 @@ public class CrawlQueues {
|
|||
}
|
||||
|
||||
urlEntry = this.noticeURL.pop(NoticedURL.StackType.CORE, true, this.sb.crawler);
|
||||
if (urlEntry == null) continue;
|
||||
if (urlEntry == null) {
|
||||
continue;
|
||||
}
|
||||
final String profileHandle = urlEntry.profileHandle();
|
||||
// System.out.println("DEBUG plasmaSwitchboard.processCrawling:
|
||||
// profileHandle = " + profileHandle + ", urlEntry.url = " + urlEntry.url());
|
||||
|
@ -271,7 +299,9 @@ public class CrawlQueues {
|
|||
return true;
|
||||
} catch (final IOException e) {
|
||||
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
|
||||
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.CORE);
|
||||
if (e.getMessage().indexOf("hash is null",0) > 0) {
|
||||
this.noticeURL.clear(NoticedURL.StackType.CORE);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -293,7 +323,7 @@ public class CrawlQueues {
|
|||
final DigestURI url = urlEntry.url();
|
||||
final String urlProtocol = url.getProtocol();
|
||||
if (this.sb.loader.isSupportedProtocol(urlProtocol)) {
|
||||
if (this.log.isFine())
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine(stats + ": URL=" + urlEntry.url()
|
||||
+ ", initiator=" + ((urlEntry.initiator() == null) ? "" : ASCII.String(urlEntry.initiator()))
|
||||
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
|
||||
|
@ -302,6 +332,7 @@ public class CrawlQueues {
|
|||
+ ", must-match=" + profile.urlMustMatchPattern().toString()
|
||||
+ ", must-not-match=" + profile.urlMustNotMatchPattern().toString()
|
||||
+ ", permission=" + ((this.sb.peers == null) ? "undefined" : (((this.sb.peers.mySeed().isSenior()) || (this.sb.peers.mySeed().isPrincipal())) ? "true" : "false")));
|
||||
}
|
||||
|
||||
// work off one Crawl stack entry
|
||||
if (urlEntry == null || urlEntry.url() == null) {
|
||||
|
@ -387,23 +418,31 @@ public class CrawlQueues {
|
|||
}
|
||||
// check again
|
||||
if (this.workers.size() >= this.sb.getConfigLong(SwitchboardConstants.CRAWLER_THREADS_ACTIVE_MAX, 20)) {
|
||||
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("remoteCrawlLoaderJob: too many processes in loader queue, dismissed (" + "cacheLoader=" + this.workers.size() + "), httpClients = " + ConnectionInfo.getCount());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
final String cautionCause = this.sb.onlineCaution();
|
||||
if (cautionCause != null) {
|
||||
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("remoteCrawlLoaderJob: online caution for " + cautionCause + ", omitting processing");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (remoteTriggeredCrawlJobSize() > 200) {
|
||||
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("remoteCrawlLoaderJob: the remote-triggered crawl job queue is filled, omitting processing");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (coreCrawlJobSize() > 0 /*&& sb.indexingStorageProcessor.queueSize() > 0*/) {
|
||||
if (this.log.isFine()) this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
|
||||
if (this.log.isFine()) {
|
||||
this.log.logFine("remoteCrawlLoaderJob: a local crawl is running, omitting processing");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -414,27 +453,37 @@ public class CrawlQueues {
|
|||
final Iterator<Seed> e = PeerSelection.getProvidesRemoteCrawlURLs(this.sb.peers);
|
||||
while (e.hasNext()) {
|
||||
seed = e.next();
|
||||
if (seed != null) this.remoteCrawlProviderHashes.add(seed.hash);
|
||||
if (seed != null) {
|
||||
this.remoteCrawlProviderHashes.add(seed.hash);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this.remoteCrawlProviderHashes.isEmpty()) return false;
|
||||
}
|
||||
if (this.remoteCrawlProviderHashes.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// take one entry from the provider list and load the entries from the remote peer
|
||||
seed = null;
|
||||
String hash = null;
|
||||
while (seed == null && !this.remoteCrawlProviderHashes.isEmpty()) {
|
||||
hash = this.remoteCrawlProviderHashes.remove(this.remoteCrawlProviderHashes.size() - 1);
|
||||
if (hash == null) continue;
|
||||
if (hash == null) {
|
||||
continue;
|
||||
}
|
||||
seed = this.sb.peers.get(hash);
|
||||
if (seed == null) continue;
|
||||
if (seed == null) {
|
||||
continue;
|
||||
}
|
||||
// check if the peer is inside our cluster
|
||||
if ((this.sb.isRobinsonMode()) && (!this.sb.isInMyCluster(seed))) {
|
||||
seed = null;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (seed == null) return false;
|
||||
if (seed == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// we know a peer which should provide remote crawl entries. load them now.
|
||||
final RSSFeed feed = Protocol.queryRemoteCrawlURLs(this.sb.peers, seed, 60, 8000);
|
||||
|
@ -467,7 +516,9 @@ public class CrawlQueues {
|
|||
final String urlRejectReason = this.sb.crawlStacker.urlInAcceptedDomain(url);
|
||||
if (urlRejectReason == null) {
|
||||
// stack url
|
||||
if (this.sb.getLog().isFinest()) this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
|
||||
if (this.sb.getLog().isFinest()) {
|
||||
this.sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
|
||||
}
|
||||
this.sb.crawlStacker.enqueueEntry(new Request(
|
||||
ASCII.getBytes(hash),
|
||||
url,
|
||||
|
@ -514,12 +565,16 @@ public class CrawlQueues {
|
|||
// or there is no global crawl on the stack
|
||||
final String queueCheck = loadIsPossible(NoticedURL.StackType.REMOTE);
|
||||
if (queueCheck != null) {
|
||||
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: " + queueCheck);
|
||||
if (this.log.isFinest()) {
|
||||
this.log.logFinest("omitting de-queue/remote: " + queueCheck);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isPaused(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
|
||||
if (this.log.isFinest()) this.log.logFinest("omitting de-queue/remote: paused");
|
||||
if (this.log.isFinest()) {
|
||||
this.log.logFinest("omitting de-queue/remote: paused");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -536,7 +591,9 @@ public class CrawlQueues {
|
|||
return true;
|
||||
} catch (final IOException e) {
|
||||
this.log.logSevere(stats + ": CANNOT FETCH ENTRY: " + e.getMessage(), e);
|
||||
if (e.getMessage().indexOf("hash is null",0) > 0) this.noticeURL.clear(NoticedURL.StackType.REMOTE);
|
||||
if (e.getMessage().indexOf("hash is null",0) > 0) {
|
||||
this.noticeURL.clear(NoticedURL.StackType.REMOTE);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -603,7 +660,9 @@ public class CrawlQueues {
|
|||
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), true);
|
||||
if (response == null) {
|
||||
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
|
||||
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
|
||||
if (CrawlQueues.this.log.isFine()) {
|
||||
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": no content (possibly caused by cache policy)");
|
||||
}
|
||||
result = "no content (possibly caused by cache policy)";
|
||||
} else {
|
||||
this.request.setStatus("loaded", WorkflowJob.STATUS_RUNNING);
|
||||
|
@ -613,7 +672,9 @@ public class CrawlQueues {
|
|||
}
|
||||
} catch (final IOException e) {
|
||||
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
|
||||
if (CrawlQueues.this.log.isFine()) CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
|
||||
if (CrawlQueues.this.log.isFine()) {
|
||||
CrawlQueues.this.log.logFine("problem loading " + this.request.url().toString() + ": " + e.getMessage());
|
||||
}
|
||||
result = "load error - " + e.getMessage();
|
||||
}
|
||||
|
||||
|
@ -646,7 +707,5 @@ public class CrawlQueues {
|
|||
assert w != null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
0
source/de/anomic/crawler/NoticedURL.java
Executable file → Normal file
0
source/de/anomic/crawler/NoticedURL.java
Executable file → Normal file
0
source/de/anomic/crawler/ResultImages.java
Executable file → Normal file
0
source/de/anomic/crawler/ResultImages.java
Executable file → Normal file
0
source/de/anomic/crawler/ZURL.java
Executable file → Normal file
0
source/de/anomic/crawler/ZURL.java
Executable file → Normal file
0
source/de/anomic/crawler/retrieval/Request.java
Executable file → Normal file
0
source/de/anomic/crawler/retrieval/Request.java
Executable file → Normal file
0
source/de/anomic/crawler/retrieval/Response.java
Executable file → Normal file
0
source/de/anomic/crawler/retrieval/Response.java
Executable file → Normal file
|
@ -48,12 +48,15 @@ public class DidYouMean {
|
|||
private static final char[] ALPHABET_KANJI = new char[512];
|
||||
static {
|
||||
// this is very experimental: a very small subset of Kanji
|
||||
for (char a = '\u3400'; a <= '\u34ff'; a++) ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
|
||||
for (char a = '\u4e00'; a <= '\u4eff'; a++) ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
|
||||
for (char a = '\u3400'; a <= '\u34ff'; a++) {
|
||||
ALPHABET_KANJI[0xff & (a - '\u3400')] = a;
|
||||
}
|
||||
for (char a = '\u4e00'; a <= '\u4eff'; a++) {
|
||||
ALPHABET_KANJI[0xff & (a - '\u4e00') + 256] = a;
|
||||
}
|
||||
}
|
||||
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
|
||||
private static char[] alphabet = ALPHABET_LATIN;
|
||||
|
||||
private static final char[][] ALPHABETS = {ALPHABET_LATIN, ALPHABET_KANJI};
|
||||
private static final StringBuilder POISON_STRING = new StringBuilder("\n");
|
||||
public static final int AVAILABLE_CPU = Runtime.getRuntime().availableProcessors();
|
||||
private static final wordLengthComparator WORD_LENGTH_COMPARATOR = new wordLengthComparator();
|
||||
|
@ -66,6 +69,7 @@ public class DidYouMean {
|
|||
private boolean createGen; // keeps the value 'true' as long as no entry in guessLib is written
|
||||
private final SortedSet<StringBuilder> resultSet;
|
||||
private final indexSizeComparator INDEX_SIZE_COMPARATOR;
|
||||
private char[] alphabet;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -88,25 +92,31 @@ public class DidYouMean {
|
|||
boolean alphafound = false;
|
||||
alphatest: for (final char[] alpha: ALPHABETS) {
|
||||
if (isAlphabet(alpha, testchar)) {
|
||||
alphabet = alpha;
|
||||
this.alphabet = new char[alpha.length];
|
||||
System.arraycopy(ALPHABET_LATIN, 0, this.alphabet, 0, alpha.length);
|
||||
alphafound = true;
|
||||
break alphatest;
|
||||
}
|
||||
}
|
||||
if (!alphafound) {
|
||||
// generate generic alphabet using simply a character block of 256 characters
|
||||
final char firstchar = (char) ((0xff & (testchar / 256)) * 256);
|
||||
final char lastchar = (char) (firstchar + 255);
|
||||
alphabet = new char[256];
|
||||
for (char a = firstchar; a <= lastchar; a++) {
|
||||
alphabet[0xff & (a - firstchar)] = a;
|
||||
final int firstchar = (0xff & (testchar / 256)) * 256;
|
||||
final int lastchar = firstchar + 255;
|
||||
this.alphabet = new char[256];
|
||||
// test this with /suggest.json?q=%EF%BD%84
|
||||
for (int a = firstchar; a <= lastchar; a++) {
|
||||
this.alphabet[0xff & (a - firstchar)] = (char) a;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final boolean isAlphabet(final char[] alpha, final char testchar) {
|
||||
for (final char a: alpha) if (a == testchar) return true;
|
||||
for (final char a: alpha) {
|
||||
if (a == testchar) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -125,10 +135,15 @@ public class DidYouMean {
|
|||
* @return
|
||||
*/
|
||||
public SortedSet<StringBuilder> getSuggestions(final long timeout, final int preSortSelection) {
|
||||
if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short
|
||||
if (this.word.length() < MinimumInputWordLength)
|
||||
{
|
||||
return this.resultSet; // return nothing if input is too short
|
||||
}
|
||||
final long startTime = System.currentTimeMillis();
|
||||
final long timelimit = startTime + timeout;
|
||||
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
|
||||
if (StringBuilderComparator.CASE_INSENSITIVE_ORDER.indexOf(this.word, ' ') > 0) {
|
||||
return getSuggestions(StringBuilderComparator.CASE_INSENSITIVE_ORDER.split(this.word, ' '), timeout, preSortSelection, this.index);
|
||||
}
|
||||
final SortedSet<StringBuilder> preSorted = getSuggestions(timeout);
|
||||
if (System.currentTimeMillis() > timelimit) {
|
||||
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
|
||||
|
@ -138,8 +153,12 @@ public class DidYouMean {
|
|||
|
||||
final ReversibleScoreMap<StringBuilder> scored = new ClusteredScoreMap<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
|
||||
for (final StringBuilder s: preSorted) {
|
||||
if (System.currentTimeMillis() > timelimit) break;
|
||||
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
|
||||
if (System.currentTimeMillis() > timelimit) {
|
||||
break;
|
||||
}
|
||||
if (!(scored.sizeSmaller(2 * preSortSelection))) {
|
||||
break;
|
||||
}
|
||||
scored.inc(s, this.index.count(Word.word2hash(s)));
|
||||
}
|
||||
final SortedSet<StringBuilder> countSorted = Collections.synchronizedSortedSet(new TreeSet<StringBuilder>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
|
||||
|
@ -147,8 +166,12 @@ public class DidYouMean {
|
|||
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
|
||||
final StringBuilder s = scored.getMaxKey();
|
||||
final int score = scored.delete(s);
|
||||
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);
|
||||
if (System.currentTimeMillis() > timelimit) break;
|
||||
if (s.length() >= MinimumOutputWordLength && score > wc) {
|
||||
countSorted.add(s);
|
||||
}
|
||||
if (System.currentTimeMillis() > timelimit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// finished
|
||||
|
@ -180,11 +203,19 @@ public class DidYouMean {
|
|||
final SortedSet<StringBuilder> result = new TreeSet<StringBuilder>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
|
||||
StringBuilder sb;
|
||||
for (int i = 0; i < words.length; i++) {
|
||||
if (s[i].isEmpty()) continue;
|
||||
if (s[i].isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
sb = new StringBuilder(20);
|
||||
for (int j = 0; j < words.length; j++) {
|
||||
if (j > 0) sb.append(' ');
|
||||
if (i == j) sb.append(s[j].first()); else sb.append(words[j]);
|
||||
if (j > 0) {
|
||||
sb.append(' ');
|
||||
}
|
||||
if (i == j) {
|
||||
sb.append(s[j].first());
|
||||
} else {
|
||||
sb.append(words[j]);
|
||||
}
|
||||
}
|
||||
result.add(sb);
|
||||
}
|
||||
|
@ -211,11 +242,13 @@ public class DidYouMean {
|
|||
// get a single recommendation for the word without altering the word
|
||||
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(this.word);
|
||||
for (final StringBuilder t: libr) {
|
||||
if (!t.equals(this.word)) try {
|
||||
if (!t.equals(this.word)) {
|
||||
try {
|
||||
this.createGen = false;
|
||||
this.guessLib.put(t);
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
// create and start producers
|
||||
// the CPU load to create the guessed words is very low, but the testing
|
||||
|
@ -226,34 +259,46 @@ public class DidYouMean {
|
|||
producers[1] = new AddingOneLetter();
|
||||
producers[2] = new DeletingOneLetter();
|
||||
producers[3] = new ReversingTwoConsecutiveLetters();
|
||||
for (final Thread t: producers) t.start();
|
||||
for (final Thread t: producers) {
|
||||
t.start();
|
||||
}
|
||||
|
||||
// start more consumers if there are more cores
|
||||
if (consumers.length > 1) for (int i = 1; i < consumers.length; i++) {
|
||||
if (consumers.length > 1) {
|
||||
for (int i = 1; i < consumers.length; i++) {
|
||||
consumers[i] = new Consumer();
|
||||
consumers[i].start();
|
||||
}
|
||||
}
|
||||
|
||||
// now decide which kind of guess is better
|
||||
// we take guessLib entries as long as there is any entry in it
|
||||
// to see if this is the case, we must wait for termination of the producer
|
||||
for (final Thread t: producers) try { t.join(); } catch (final InterruptedException e) {}
|
||||
for (final Thread t: producers) {
|
||||
try { t.join(); } catch (final InterruptedException e) {}
|
||||
}
|
||||
|
||||
// if there is not any entry in guessLib, then transfer all entries from the
|
||||
// guessGen to guessLib
|
||||
if (this.createGen) try {
|
||||
if (this.createGen) {
|
||||
try {
|
||||
this.guessGen.put(POISON_STRING);
|
||||
StringBuilder s;
|
||||
while (!(s = this.guessGen.take()).equals(POISON_STRING)) this.guessLib.put(s);
|
||||
while (!(s = this.guessGen.take()).equals(POISON_STRING)) {
|
||||
this.guessLib.put(s);
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
|
||||
// put poison into guessLib to terminate consumers
|
||||
for (@SuppressWarnings("unused") final Consumer c: consumers)
|
||||
for (@SuppressWarnings("unused") final Consumer c: consumers) {
|
||||
try { this.guessLib.put(POISON_STRING); } catch (final InterruptedException e) {}
|
||||
}
|
||||
|
||||
// wait for termination of consumer
|
||||
for (final Consumer c: consumers)
|
||||
for (final Consumer c: consumers) {
|
||||
try { c.join(); } catch (final InterruptedException e) {}
|
||||
}
|
||||
|
||||
// we don't want the given word in the result
|
||||
this.resultSet.remove(this.word);
|
||||
|
@ -265,7 +310,9 @@ public class DidYouMean {
|
|||
private void test(final StringBuilder s) throws InterruptedException {
|
||||
final Set<StringBuilder> libr = LibraryProvider.dymLib.recommend(s);
|
||||
libr.addAll(LibraryProvider.geoLoc.recommend(s));
|
||||
if (!libr.isEmpty()) this.createGen = false;
|
||||
if (!libr.isEmpty()) {
|
||||
this.createGen = false;
|
||||
}
|
||||
for (final StringBuilder t: libr) {
|
||||
this.guessLib.put(t);
|
||||
}
|
||||
|
@ -284,18 +331,22 @@ public class DidYouMean {
|
|||
@Override
|
||||
public void run() {
|
||||
char m;
|
||||
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
|
||||
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
|
||||
try {
|
||||
m = DidYouMean.this.word.charAt(i);
|
||||
for (final char c: alphabet) {
|
||||
for (final char c: DidYouMean.this.alphabet) {
|
||||
if (m != c) {
|
||||
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i + 1));
|
||||
test(ts);
|
||||
}
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DidYouMean's producer thread that deletes extra letters (e.g. frog/fog) for a given term
|
||||
|
@ -306,12 +357,16 @@ public class DidYouMean {
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
for (int i = 0; i < DidYouMean.this.wordLen; i++) try {
|
||||
for (int i = 0; i < DidYouMean.this.wordLen; i++) {
|
||||
try {
|
||||
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.substring(i + 1));
|
||||
test(ts);
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
||||
return;
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -324,15 +379,19 @@ public class DidYouMean {
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
for (int i = 0; i <= DidYouMean.this.wordLen; i++) try {
|
||||
for (final char c: alphabet) {
|
||||
for (int i = 0; i <= DidYouMean.this.wordLen; i++) {
|
||||
try {
|
||||
for (final char c: DidYouMean.this.alphabet) {
|
||||
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(c).append(DidYouMean.this.word.substring(i));
|
||||
test(ts);
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* DidYouMean's producer thread that reverses any two consecutive letters (e.g. two/tow) for a given term
|
||||
|
@ -343,12 +402,16 @@ public class DidYouMean {
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) try {
|
||||
for (int i = 0; i < DidYouMean.this.wordLen - 1; i++) {
|
||||
try {
|
||||
final StringBuilder ts = new StringBuilder(DidYouMean.this.word.length() + 1).append(DidYouMean.this.word.substring(0, i)).append(DidYouMean.this.word.charAt(i + 1)).append(DidYouMean.this.word.charAt(i)).append(DidYouMean.this.word.substring(i + 2));
|
||||
test(ts);
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
||||
return;
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -364,8 +427,12 @@ public class DidYouMean {
|
|||
StringBuilder s;
|
||||
try {
|
||||
while ((s = DidYouMean.this.guessLib.take()) != POISON_STRING) {
|
||||
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) DidYouMean.this.resultSet.add(s);
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) return;
|
||||
if (s.length() >= MinimumOutputWordLength && DidYouMean.this.index.has(Word.word2hash(s))) {
|
||||
DidYouMean.this.resultSet.add(s);
|
||||
}
|
||||
if (System.currentTimeMillis() > DidYouMean.this.timeLimit) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
} catch (final InterruptedException e) {}
|
||||
}
|
||||
|
@ -377,10 +444,13 @@ public class DidYouMean {
|
|||
*/
|
||||
private class indexSizeComparator implements Comparator<StringBuilder> {
|
||||
|
||||
@Override
|
||||
public int compare(final StringBuilder o1, final StringBuilder o2) {
|
||||
final int i1 = DidYouMean.this.index.count(Word.word2hash(o1));
|
||||
final int i2 = DidYouMean.this.index.count(Word.word2hash(o2));
|
||||
if (i1 == i2) return WORD_LENGTH_COMPARATOR.compare(o1, o2);
|
||||
if (i1 == i2) {
|
||||
return WORD_LENGTH_COMPARATOR.compare(o1, o2);
|
||||
}
|
||||
return (i1 < i2) ? 1 : -1; // '<' is correct, because the largest count shall be ordered to be the first position in the result
|
||||
}
|
||||
}
|
||||
|
@ -391,10 +461,13 @@ public class DidYouMean {
|
|||
*/
|
||||
private static class wordLengthComparator implements Comparator<StringBuilder> {
|
||||
|
||||
@Override
|
||||
public int compare(final StringBuilder o1, final StringBuilder o2) {
|
||||
final int i1 = o1.length();
|
||||
final int i2 = o2.length();
|
||||
if (i1 == i2) return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
|
||||
if (i1 == i2) {
|
||||
return StringBuilderComparator.CASE_INSENSITIVE_ORDER.compare(o1, o2);
|
||||
}
|
||||
return (i1 < i2) ? 1 : -1; // '<' is correct, because the longest word shall be first
|
||||
}
|
||||
|
||||
|
@ -411,10 +484,13 @@ public class DidYouMean {
|
|||
this.secondaryComparator = secondaryComparator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(final StringBuilder o1, final StringBuilder o2) {
|
||||
final boolean o1m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o1, this.head);
|
||||
final boolean o2m = StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(o2, this.head);
|
||||
if ((o1m && o2m) || (!o1m && !o2m)) return this.secondaryComparator.compare(o1, o2);
|
||||
if ((o1m && o2m) || (!o1m && !o2m)) {
|
||||
return this.secondaryComparator.compare(o1, o2);
|
||||
}
|
||||
return o1m ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
|
0
source/net/yacy/ai/example/ConnectFour.java
Executable file → Normal file
0
source/net/yacy/ai/example/ConnectFour.java
Executable file → Normal file
0
source/net/yacy/ai/example/testorder.java
Executable file → Normal file
0
source/net/yacy/ai/example/testorder.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/AbstractFinding.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/AbstractFinding.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/AbstractModel.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/AbstractModel.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Agent.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Agent.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Asset.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Asset.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Attempts.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Attempts.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Battle.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Battle.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Challenge.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Challenge.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Context.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Context.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Engine.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Engine.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Finding.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Finding.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Goal.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Goal.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Model.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Model.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Role.java
Executable file → Normal file
0
source/net/yacy/ai/greedy/Role.java
Executable file → Normal file
0
source/net/yacy/cora/protocol/RequestHeader.java
Executable file → Normal file
0
source/net/yacy/cora/protocol/RequestHeader.java
Executable file → Normal file
0
source/net/yacy/cora/protocol/ResponseHeader.java
Executable file → Normal file
0
source/net/yacy/cora/protocol/ResponseHeader.java
Executable file → Normal file
42
source/net/yacy/kelondro/blob/ArrayStack.java
Executable file → Normal file
42
source/net/yacy/kelondro/blob/ArrayStack.java
Executable file → Normal file
|
@ -215,12 +215,14 @@ public class ArrayStack implements BLOB {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long mem() {
|
||||
long m = 0;
|
||||
if (this.blobs != null) for (final blobItem b: this.blobs) m += b.blob.mem();
|
||||
return m;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void trim() {
|
||||
// trim shall not be called for ArrayStacks because the characteristics of an ArrayStack is that the 'topmost' BLOB on the stack
|
||||
// is used for write operations and all other shall be trimmed automatically since they are not used for writing. And the
|
||||
|
@ -374,6 +376,7 @@ public class ArrayStack implements BLOB {
|
|||
return new File(this.heapLocation, this.prefix + "." + my_SHORT_MILSEC_FORMATTER.format(creation) + ".blob");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return this.heapLocation.getName();
|
||||
}
|
||||
|
@ -414,12 +417,14 @@ public class ArrayStack implements BLOB {
|
|||
/*
|
||||
* return the size of the repository (in bytes)
|
||||
*/
|
||||
@Override
|
||||
public synchronized long length() {
|
||||
long s = 0;
|
||||
for (int i = 0; i < this.blobs.size(); i++) s += this.blobs.get(i).location.length();
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteOrder ordering() {
|
||||
return this.ordering;
|
||||
}
|
||||
|
@ -446,6 +451,7 @@ public class ArrayStack implements BLOB {
|
|||
* ask for the length of the primary key
|
||||
* @return the length of the key
|
||||
*/
|
||||
@Override
|
||||
public int keylength() {
|
||||
return this.keylength;
|
||||
}
|
||||
|
@ -454,6 +460,7 @@ public class ArrayStack implements BLOB {
|
|||
* clears the content of the database
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized void clear() throws IOException {
|
||||
for (final blobItem bi: this.blobs) {
|
||||
bi.blob.clear();
|
||||
|
@ -467,12 +474,14 @@ public class ArrayStack implements BLOB {
|
|||
* ask for the number of blob entries
|
||||
* @return the number of entries in the table
|
||||
*/
|
||||
@Override
|
||||
public synchronized int size() {
|
||||
int s = 0;
|
||||
for (final blobItem bi: this.blobs) s += bi.blob.size();
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean isEmpty() {
|
||||
for (final blobItem bi: this.blobs) if (!bi.blob.isEmpty()) return false;
|
||||
return true;
|
||||
|
@ -497,6 +506,7 @@ public class ArrayStack implements BLOB {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
|
||||
assert rotating == false;
|
||||
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
|
||||
|
@ -514,6 +524,7 @@ public class ArrayStack implements BLOB {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
|
||||
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(this.blobs.size());
|
||||
final Iterator<blobItem> i = this.blobs.iterator();
|
||||
|
@ -529,6 +540,7 @@ public class ArrayStack implements BLOB {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized boolean containsKey(final byte[] key) {
|
||||
final blobItem bi = keeperOf(key);
|
||||
return bi != null;
|
||||
|
@ -550,14 +562,27 @@ public class ArrayStack implements BLOB {
|
|||
return null;
|
||||
}
|
||||
|
||||
// first check the current blob only because that has most probably the key if any has that key
|
||||
int bs1 = this.blobs.size() - 1;
|
||||
blobItem bi = this.blobs.get(bs1);
|
||||
if (bi.blob.containsKey(key)) return bi;
|
||||
if (this.blobs.size() == 2) {
|
||||
// this should not be done concurrently
|
||||
bi = this.blobs.get(0);
|
||||
if (bi.blob.containsKey(key)) return bi;
|
||||
return null;
|
||||
}
|
||||
|
||||
// start a concurrent query to database tables
|
||||
final CompletionService<blobItem> cs = new ExecutorCompletionService<blobItem>(this.executor);
|
||||
int accepted = 0;
|
||||
for (final blobItem bi : this.blobs) {
|
||||
for (int i = 0; i < bs1; i++) {
|
||||
final blobItem b = this.blobs.get(i);
|
||||
try {
|
||||
cs.submit(new Callable<blobItem>() {
|
||||
@Override
|
||||
public blobItem call() {
|
||||
if (bi.blob.containsKey(key)) return bi;
|
||||
if (b.blob.containsKey(key)) return b;
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
@ -565,7 +590,7 @@ public class ArrayStack implements BLOB {
|
|||
} catch (final RejectedExecutionException e) {
|
||||
// the executor is either shutting down or the blocking queue is full
|
||||
// execute the search direct here without concurrency
|
||||
if (bi.blob.containsKey(key)) return bi;
|
||||
if (b.blob.containsKey(key)) return b;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -599,6 +624,7 @@ public class ArrayStack implements BLOB {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public byte[] get(final byte[] key) throws IOException, RowSpaceExceededException {
|
||||
if (this.blobs.size() == 0) return null;
|
||||
if (this.blobs.size() == 1) {
|
||||
|
@ -619,6 +645,7 @@ public class ArrayStack implements BLOB {
|
|||
*/
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] get(final Object key) {
|
||||
if (!(key instanceof byte[])) return null;
|
||||
try {
|
||||
|
@ -652,6 +679,7 @@ public class ArrayStack implements BLOB {
|
|||
this.key = key;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected byte[] next0() {
|
||||
while (this.bii.hasNext()) {
|
||||
final BLOB b = this.bii.next().blob;
|
||||
|
@ -677,6 +705,7 @@ public class ArrayStack implements BLOB {
|
|||
* @return the size of the BLOB or -1 if the BLOB does not exist
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized long length(final byte[] key) throws IOException {
|
||||
long l;
|
||||
for (final blobItem bi: this.blobs) {
|
||||
|
@ -707,6 +736,7 @@ public class ArrayStack implements BLOB {
|
|||
this.key = key;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Long next0() {
|
||||
while (this.bii.hasNext()) {
|
||||
final BLOB b = this.bii.next().blob;
|
||||
|
@ -744,6 +774,7 @@ public class ArrayStack implements BLOB {
|
|||
* @throws IOException
|
||||
* @throws RowSpaceExceededException
|
||||
*/
|
||||
@Override
|
||||
public synchronized void insert(final byte[] key, final byte[] b) throws IOException {
|
||||
blobItem bi = (this.blobs.isEmpty()) ? null : this.blobs.get(this.blobs.size() - 1);
|
||||
/*
|
||||
|
@ -770,6 +801,7 @@ public class ArrayStack implements BLOB {
|
|||
* @throws IOException
|
||||
* @throws RowSpaceExceededException
|
||||
*/
|
||||
@Override
|
||||
public synchronized int replace(final byte[] key, final Rewriter rewriter) throws IOException, RowSpaceExceededException {
|
||||
int d = 0;
|
||||
for (final blobItem bi: this.blobs) {
|
||||
|
@ -784,6 +816,7 @@ public class ArrayStack implements BLOB {
|
|||
* @throws IOException
|
||||
* @throws RowSpaceExceededException
|
||||
*/
|
||||
@Override
|
||||
public synchronized int reduce(final byte[] key, final Reducer reduce) throws IOException, RowSpaceExceededException {
|
||||
int d = 0;
|
||||
for (final blobItem bi: this.blobs) {
|
||||
|
@ -797,6 +830,7 @@ public class ArrayStack implements BLOB {
|
|||
* @param key the primary key
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public synchronized void delete(final byte[] key) throws IOException {
|
||||
final long m = mem();
|
||||
if (this.blobs.size() == 0) {
|
||||
|
@ -812,6 +846,7 @@ public class ArrayStack implements BLOB {
|
|||
// run this in a concurrent thread
|
||||
final blobItem bi0 = bi;
|
||||
t[i] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
try { bi0.blob.delete(key); } catch (final IOException e) {}
|
||||
}
|
||||
|
@ -831,6 +866,7 @@ public class ArrayStack implements BLOB {
|
|||
/**
|
||||
* close the BLOB
|
||||
*/
|
||||
@Override
|
||||
public synchronized void close(final boolean writeIDX) {
|
||||
for (final blobItem bi: this.blobs) bi.blob.close(writeIDX);
|
||||
this.blobs.clear();
|
||||
|
|
0
source/net/yacy/kelondro/blob/Heap.java
Executable file → Normal file
0
source/net/yacy/kelondro/blob/Heap.java
Executable file → Normal file
540
source/net/yacy/kelondro/data/meta/URIMetadataNode.java
Normal file
540
source/net/yacy/kelondro/data/meta/URIMetadataNode.java
Normal file
|
@ -0,0 +1,540 @@
|
|||
package net.yacy.kelondro.data.meta;
|
||||
|
||||
import net.yacy.cora.lod.Node;
|
||||
import net.yacy.cora.lod.vocabulary.Rdf;
|
||||
import net.yacy.kelondro.data.word.WordReferenceVars;
|
||||
|
||||
|
||||
public class URIMetadataNode /*implements URIMetadata*/ {
|
||||
|
||||
private final Node entry;
|
||||
private final String snippet;
|
||||
private final WordReferenceVars word; // this is only used if the url is transported via remote search requests
|
||||
private final long ranking; // during generation of a search result this value is set
|
||||
|
||||
public URIMetadataNode() {
|
||||
// create a dummy entry, good to produce poison objects
|
||||
this.entry = new Node(Rdf.Description);
|
||||
this.snippet = null;
|
||||
this.word = null;
|
||||
this.ranking = 0;
|
||||
}
|
||||
/*
|
||||
public URIMetadataNode(
|
||||
final DigestURI url,
|
||||
final String dc_title,
|
||||
final String dc_creator,
|
||||
final String dc_subject,
|
||||
final String dc_publisher,
|
||||
final float lon, final float lat, // decimal degrees as in WGS84; if unknown both values may be 0.0f;
|
||||
final Date mod,
|
||||
final Date load,
|
||||
final Date fresh,
|
||||
final String referrer,
|
||||
final byte[] md5,
|
||||
final long size,
|
||||
final int wc,
|
||||
final char dt,
|
||||
final Bitfield flags,
|
||||
final byte[] lang,
|
||||
final int llocal,
|
||||
final int lother,
|
||||
final int laudio,
|
||||
final int limage,
|
||||
final int lvideo,
|
||||
final int lapp) {
|
||||
// create new entry
|
||||
this.entry = new Node();
|
||||
this.entry.setSubject(UTF8.getBytes(url.toNormalform(true, false)));
|
||||
this.entry.setObject(YaCyMetadata.hash, url.hash());
|
||||
this.entry.setObject(DublinCore.Title, UTF8.getBytes(dc_title));
|
||||
this.entry.setObject(DublinCore.Creator, UTF8.getBytes(dc_creator));
|
||||
this.entry.setObject(DublinCore.Subject, UTF8.getBytes(dc_subject));
|
||||
this.entry.setObject(DublinCore.Publisher, UTF8.getBytes(dc_publisher));
|
||||
this.entry.setObject(Geo.Lat, ASCII.getBytes(Float.toString(lat)));
|
||||
this.entry.setObject(Geo.Long, ASCII.getBytes(Float.toString(lon)));
|
||||
|
||||
|
||||
encodeDate(col_mod, mod);
|
||||
encodeDate(col_load, load);
|
||||
encodeDate(col_fresh, fresh);
|
||||
this.entry.setCol(col_referrer, (referrer == null) ? null : UTF8.getBytes(referrer));
|
||||
this.entry.setCol(col_md5, md5);
|
||||
this.entry.setCol(col_size, size);
|
||||
this.entry.setCol(col_wc, wc);
|
||||
this.entry.setCol(col_dt, new byte[]{(byte) dt});
|
||||
this.entry.setCol(col_flags, flags.bytes());
|
||||
this.entry.setCol(col_lang, lang);
|
||||
this.entry.setCol(col_llocal, llocal);
|
||||
this.entry.setCol(col_lother, lother);
|
||||
this.entry.setCol(col_limage, limage);
|
||||
this.entry.setCol(col_laudio, laudio);
|
||||
this.entry.setCol(col_lvideo, lvideo);
|
||||
this.entry.setCol(col_lapp, lapp);
|
||||
//System.out.println("===DEBUG=== " + load.toString() + ", " + decodeDate(col_load).toString());
|
||||
this.snippet = null;
|
||||
this.word = null;
|
||||
this.ranking = 0;
|
||||
this.comp = null;
|
||||
}
|
||||
|
||||
private byte[] encodeDate(final Date d) {
|
||||
// calculates the number of days since 1.1.1970 and returns this as 4-byte array
|
||||
// 86400000 is the number of milliseconds in one day
|
||||
return NaturalOrder.encodeLong(d.getTime() / 86400000L, 4);
|
||||
}
|
||||
|
||||
private Date decodeDate(final int col) {
|
||||
final long t = this.entry.getColLong(col);
|
||||
}
|
||||
|
||||
public static byte[] encodeComp(
|
||||
final DigestURI url,
|
||||
final String dc_title,
|
||||
final String dc_creator,
|
||||
final String dc_subject,
|
||||
final String dc_publisher,
|
||||
final float lat,
|
||||
final float lon) {
|
||||
final CharBuffer s = new CharBuffer(360);
|
||||
s.append(url.toNormalform(false, true)).appendLF();
|
||||
s.append(dc_title).appendLF();
|
||||
if (dc_creator.length() > 80) s.append(dc_creator, 0, 80); else s.append(dc_creator);
|
||||
s.appendLF();
|
||||
if (dc_subject.length() > 120) s.append(dc_subject, 0, 120); else s.append(dc_subject);
|
||||
s.appendLF();
|
||||
if (dc_publisher.length() > 80) s.append(dc_publisher, 0, 80); else s.append(dc_publisher);
|
||||
s.appendLF();
|
||||
if (lon == 0.0f && lat == 0.0f) s.appendLF(); else s.append(Float.toString(lat)).append(',').append(Float.toString(lon)).appendLF();
|
||||
return UTF8.getBytes(s.toString());
|
||||
}
|
||||
|
||||
public URIMetadataRow(final Row.Entry entry, final WordReferenceVars searchedWord, final long ranking) {
|
||||
this.entry = entry;
|
||||
this.snippet = null;
|
||||
this.word = searchedWord;
|
||||
this.ranking = ranking;
|
||||
this.comp = null;
|
||||
}
|
||||
|
||||
public URIMetadataRow(final Properties prop) {
|
||||
// generates an plasmaLURLEntry using the properties from the argument
|
||||
// the property names must correspond to the one from toString
|
||||
//System.out.println("DEBUG-ENTRY: prop=" + prop.toString());
|
||||
DigestURI url;
|
||||
try {
|
||||
url = new DigestURI(crypt.simpleDecode(prop.getProperty("url", ""), null), ASCII.getBytes(prop.getProperty("hash")));
|
||||
} catch (final MalformedURLException e) {
|
||||
url = null;
|
||||
}
|
||||
String descr = crypt.simpleDecode(prop.getProperty("descr", ""), null); if (descr == null) descr = "";
|
||||
String dc_creator = crypt.simpleDecode(prop.getProperty("author", ""), null); if (dc_creator == null) dc_creator = "";
|
||||
String tags = crypt.simpleDecode(prop.getProperty("tags", ""), null); if (tags == null) tags = "";
|
||||
String dc_publisher = crypt.simpleDecode(prop.getProperty("publisher", ""), null); if (dc_publisher == null) dc_publisher = "";
|
||||
String lons = crypt.simpleDecode(prop.getProperty("lon", "0.0"), null); if (lons == null) lons = "0.0";
|
||||
String lats = crypt.simpleDecode(prop.getProperty("lat", "0.0"), null); if (lats == null) lats = "0.0";
|
||||
|
||||
this.entry = rowdef.newEntry();
|
||||
this.entry.setCol(col_hash, url.hash()); // FIXME potential null pointer access
|
||||
this.entry.setCol(col_comp, encodeComp(url, descr, dc_creator, tags, dc_publisher, Float.parseFloat(lats), Float.parseFloat(lons)));
|
||||
|
||||
// create new formatters to make concurrency possible
|
||||
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
||||
|
||||
try {
|
||||
encodeDate(col_mod, formatter.parse(prop.getProperty("mod", "20000101")));
|
||||
} catch (final ParseException e) {
|
||||
encodeDate(col_mod, new Date());
|
||||
}
|
||||
try {
|
||||
encodeDate(col_load, formatter.parse(prop.getProperty("load", "20000101")));
|
||||
} catch (final ParseException e) {
|
||||
encodeDate(col_load, new Date());
|
||||
}
|
||||
try {
|
||||
encodeDate(col_fresh, formatter.parse(prop.getProperty("fresh", "20000101")));
|
||||
} catch (final ParseException e) {
|
||||
encodeDate(col_fresh, new Date());
|
||||
}
|
||||
this.entry.setCol(col_referrer, UTF8.getBytes(prop.getProperty("referrer", "")));
|
||||
this.entry.setCol(col_md5, Digest.decodeHex(prop.getProperty("md5", "")));
|
||||
this.entry.setCol(col_size, Integer.parseInt(prop.getProperty("size", "0")));
|
||||
this.entry.setCol(col_wc, Integer.parseInt(prop.getProperty("wc", "0")));
|
||||
final String dt = prop.getProperty("dt", "t");
|
||||
this.entry.setCol(col_dt, dt.length() > 0 ? new byte[]{(byte) dt.charAt(0)} : new byte[]{(byte) 't'});
|
||||
final String flags = prop.getProperty("flags", "AAAAAA");
|
||||
this.entry.setCol(col_flags, (flags.length() > 6) ? QueryParams.empty_constraint.bytes() : (new Bitfield(4, flags)).bytes());
|
||||
this.entry.setCol(col_lang, UTF8.getBytes(prop.getProperty("lang", "uk")));
|
||||
this.entry.setCol(col_llocal, Integer.parseInt(prop.getProperty("llocal", "0")));
|
||||
this.entry.setCol(col_lother, Integer.parseInt(prop.getProperty("lother", "0")));
|
||||
this.entry.setCol(col_limage, Integer.parseInt(prop.getProperty("limage", "0")));
|
||||
this.entry.setCol(col_laudio, Integer.parseInt(prop.getProperty("laudio", "0")));
|
||||
this.entry.setCol(col_lvideo, Integer.parseInt(prop.getProperty("lvideo", "0")));
|
||||
this.entry.setCol(col_lapp, Integer.parseInt(prop.getProperty("lapp", "0")));
|
||||
this.snippet = crypt.simpleDecode(prop.getProperty("snippet", ""), null);
|
||||
this.word = null;
|
||||
if (prop.containsKey("word")) throw new kelondroException("old database structure is not supported");
|
||||
if (prop.containsKey("wi")) {
|
||||
this.word = new WordReferenceVars(new WordReferenceRow(Base64Order.enhancedCoder.decodeString(prop.getProperty("wi", ""))));
|
||||
}
|
||||
this.ranking = 0;
|
||||
this.comp = null;
|
||||
}
|
||||
|
||||
public static URIMetadataRow importEntry(final String propStr) {
|
||||
if (propStr == null || (propStr.length() > 0 && propStr.charAt(0) != '{') || !propStr.endsWith("}")) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return new URIMetadataRow(MapTools.s2p(propStr.substring(1, propStr.length() - 1)));
|
||||
} catch (final kelondroException e) {
|
||||
// wrong format
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private StringBuilder corePropList() {
|
||||
// generate a parseable string; this is a simple property-list
|
||||
final Components metadata = metadata();
|
||||
final StringBuilder s = new StringBuilder(300);
|
||||
if (metadata == null) return null;
|
||||
//System.out.println("author=" + comp.author());
|
||||
|
||||
// create new formatters to make concurrency possible
|
||||
final GenericFormatter formatter = new GenericFormatter(GenericFormatter.FORMAT_SHORT_DAY, GenericFormatter.time_minute);
|
||||
|
||||
try {
|
||||
s.append("hash=").append(ASCII.String(hash()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",url=").append(crypt.simpleEncode(metadata.url().toNormalform(false, true)));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",descr=").append(crypt.simpleEncode(metadata.dc_title()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",author=").append(crypt.simpleEncode(metadata.dc_creator()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",tags=").append(crypt.simpleEncode(metadata.dc_subject()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",publisher=").append(crypt.simpleEncode(metadata.dc_publisher()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lat=").append(metadata.lat());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lon=").append(metadata.lon());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",mod=").append(formatter.format(moddate()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",load=").append(formatter.format(loaddate()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",fresh=").append(formatter.format(freshdate()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",referrer=").append(referrerHash() == null ? "" : ASCII.String(referrerHash()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",md5=").append(md5());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",size=").append(size());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",wc=").append(wordCount());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",dt=").append(doctype());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",flags=").append(flags().exportB64());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lang=").append(language() == null ? "EN" : UTF8.String(language()));
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",llocal=").append(llocal());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lother=").append(lother());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",limage=").append(limage());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",laudio=").append(laudio());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lvideo=").append(lvideo());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
s.append(",lapp=").append(lapp());
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
|
||||
if (this.word != null) {
|
||||
// append also word properties
|
||||
final String wprop = this.word.toPropertyForm();
|
||||
s.append(",wi=").append(Base64Order.enhancedCoder.encodeString(wprop));
|
||||
}
|
||||
assert (s.toString().indexOf(0) < 0);
|
||||
return s;
|
||||
|
||||
} catch (final Throwable e) {
|
||||
// serverLog.logFailure("plasmaLURL.corePropList", e.getMessage());
|
||||
// if (moddate == null) serverLog.logFailure("plasmaLURL.corePropList", "moddate=null");
|
||||
// if (loaddate == null) serverLog.logFailure("plasmaLURL.corePropList", "loaddate=null");
|
||||
Log.logException(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Row.Entry toRowEntry() {
|
||||
return this.entry;
|
||||
}
|
||||
|
||||
public byte[] hash() {
|
||||
// return a url-hash, based on the md5 algorithm
|
||||
// the result is a String of 12 bytes within a 72-bit space
|
||||
// (each byte has an 6-bit range)
|
||||
// that should be enough for all web pages on the world
|
||||
return this.entry.getPrimaryKeyBytes();
|
||||
}
|
||||
|
||||
public long ranking() {
|
||||
return this.ranking;
|
||||
}
|
||||
|
||||
public boolean matches(final Pattern matcher) {
|
||||
return this.metadata().matches(matcher);
|
||||
}
|
||||
|
||||
public DigestURI url() {
|
||||
return this.metadata().url();
|
||||
}
|
||||
|
||||
public String dc_title() {
|
||||
return this.metadata().dc_title();
|
||||
}
|
||||
|
||||
public String dc_creator() {
|
||||
return this.metadata().dc_creator();
|
||||
}
|
||||
|
||||
public String dc_publisher() {
|
||||
return this.metadata().dc_publisher();
|
||||
}
|
||||
|
||||
public String dc_subject() {
|
||||
return this.metadata().dc_subject();
|
||||
}
|
||||
|
||||
public float lat() {
|
||||
return this.metadata().lat();
|
||||
}
|
||||
|
||||
public float lon() {
|
||||
return this.metadata().lon();
|
||||
}
|
||||
|
||||
private Components metadata() {
|
||||
// avoid double computation of metadata elements
|
||||
if (this.comp != null) return this.comp;
|
||||
// parse elements from comp field;
|
||||
final byte[] c = this.entry.getColBytes(col_comp, true);
|
||||
final List<byte[]> cl = ByteBuffer.split(c, (byte) 10);
|
||||
this.comp = new Components(
|
||||
(cl.size() > 0) ? UTF8.String(cl.get(0)) : "",
|
||||
hash(),
|
||||
(cl.size() > 1) ? UTF8.String(cl.get(1)) : "",
|
||||
(cl.size() > 2) ? UTF8.String(cl.get(2)) : "",
|
||||
(cl.size() > 3) ? UTF8.String(cl.get(3)) : "",
|
||||
(cl.size() > 4) ? UTF8.String(cl.get(4)) : "",
|
||||
(cl.size() > 5) ? UTF8.String(cl.get(5)) : "");
|
||||
return this.comp;
|
||||
}
|
||||
|
||||
public Date moddate() {
|
||||
return decodeDate(col_mod);
|
||||
}
|
||||
|
||||
public Date loaddate() {
|
||||
return decodeDate(col_load);
|
||||
}
|
||||
|
||||
public Date freshdate() {
|
||||
return decodeDate(col_fresh);
|
||||
}
|
||||
|
||||
public byte[] referrerHash() {
|
||||
// return the creator's hash or null if there is none
|
||||
// FIXME: There seem to be some malformed entries in the databasees like "null\0\0\0\0\0\0\0\0"
|
||||
final byte[] r = this.entry.getColBytes(col_referrer, true);
|
||||
if (r != null) {
|
||||
int i = r.length;
|
||||
while (i > 0) {
|
||||
if (r[--i] == 0) return null;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
public String md5() {
|
||||
// returns the md5 in hex representation
|
||||
return Digest.encodeHex(this.entry.getColBytes(col_md5, true));
|
||||
}
|
||||
|
||||
public char doctype() {
|
||||
return (char) this.entry.getColByte(col_dt);
|
||||
}
|
||||
|
||||
public byte[] language() {
|
||||
byte[] b = this.entry.getColBytes(col_lang, true);
|
||||
if (b == null || b[0] == (byte)'[') {
|
||||
String tld = this.metadata().url.getTLD();
|
||||
if (tld.length() < 2 || tld.length() > 2) return ASCII.getBytes("en");
|
||||
return ASCII.getBytes(tld);
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return (int) this.entry.getColLong(col_size);
|
||||
}
|
||||
|
||||
public Bitfield flags() {
|
||||
return new Bitfield(this.entry.getColBytes(col_flags, true));
|
||||
}
|
||||
|
||||
public int wordCount() {
|
||||
return (int) this.entry.getColLong(col_wc);
|
||||
}
|
||||
|
||||
public int llocal() {
|
||||
return (int) this.entry.getColLong(col_llocal);
|
||||
}
|
||||
|
||||
public int lother() {
|
||||
return (int) this.entry.getColLong(col_lother);
|
||||
}
|
||||
|
||||
public int limage() {
|
||||
return (int) this.entry.getColLong(col_limage);
|
||||
}
|
||||
|
||||
public int laudio() {
|
||||
return (int) this.entry.getColLong(col_laudio);
|
||||
}
|
||||
|
||||
public int lvideo() {
|
||||
return (int) this.entry.getColLong(col_lvideo);
|
||||
}
|
||||
|
||||
public int lapp() {
|
||||
return (int) this.entry.getColLong(col_lapp);
|
||||
}
|
||||
|
||||
public String snippet() {
|
||||
// the snippet may appear here if the url was transported in a remote search
|
||||
// it will not be saved anywhere, but can only be requested here
|
||||
return this.snippet;
|
||||
}
|
||||
|
||||
public WordReferenceVars word() {
|
||||
return this.word;
|
||||
}
|
||||
|
||||
public boolean isOlder(final URIMetadata other) {
|
||||
if (other == null) return false;
|
||||
final Date tmoddate = moddate();
|
||||
final Date omoddate = other.moddate();
|
||||
if (tmoddate.before(omoddate)) return true;
|
||||
if (tmoddate.equals(omoddate)) {
|
||||
final Date tloaddate = loaddate();
|
||||
final Date oloaddate = other.loaddate();
|
||||
if (tloaddate.before(oloaddate)) return true;
|
||||
if (tloaddate.equals(oloaddate)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString(final String snippet) {
|
||||
// add information needed for remote transport
|
||||
final StringBuilder core = corePropList();
|
||||
if (core == null)
|
||||
return null;
|
||||
|
||||
core.ensureCapacity(core.length() + snippet.length() * 2);
|
||||
core.insert(0, "{");
|
||||
core.append(",snippet=").append(crypt.simpleEncode(snippet));
|
||||
core.append("}");
|
||||
|
||||
return core.toString();
|
||||
//return "{" + core + ",snippet=" + crypt.simpleEncode(snippet) + "}";
|
||||
}
|
||||
|
||||
public Request toBalancerEntry(final String initiatorHash) {
|
||||
return new Request(
|
||||
ASCII.getBytes(initiatorHash),
|
||||
metadata().url(),
|
||||
referrerHash(),
|
||||
metadata().dc_title(),
|
||||
moddate(),
|
||||
null,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder core = corePropList();
|
||||
if (core == null) return null;
|
||||
|
||||
core.insert(0, "{");
|
||||
core.append("}");
|
||||
|
||||
return core.toString();
|
||||
//return "{" + core + "}";
|
||||
}
|
||||
|
||||
private class Components {
|
||||
private DigestURI url;
|
||||
private String urlRaw;
|
||||
private byte[] urlHash;
|
||||
private final String dc_title, dc_creator, dc_subject, dc_publisher;
|
||||
private final String latlon; // a comma-separated tuple as "<latitude>,<longitude>" where the coordinates are given as WGS84 spatial coordinates in decimal degrees
|
||||
|
||||
public Components(
|
||||
final String urlRaw,
|
||||
final byte[] urlhash,
|
||||
final String title,
|
||||
final String author,
|
||||
final String tags,
|
||||
final String publisher,
|
||||
final String latlon) {
|
||||
this.url = null;
|
||||
this.urlRaw = urlRaw;
|
||||
this.urlHash = urlhash;
|
||||
this.dc_title = title;
|
||||
this.dc_creator = author;
|
||||
this.dc_subject = tags;
|
||||
this.dc_publisher = publisher;
|
||||
this.latlon = latlon;
|
||||
}
|
||||
public boolean matches(final Pattern matcher) {
|
||||
if (this.urlRaw != null) return matcher.matcher(this.urlRaw.toLowerCase()).matches();
|
||||
if (this.url != null) return matcher.matcher(this.url.toNormalform(true, true).toLowerCase()).matches();
|
||||
return false;
|
||||
}
|
||||
public DigestURI url() {
|
||||
if (this.url == null) {
|
||||
try {
|
||||
this.url = new DigestURI(this.urlRaw, this.urlHash);
|
||||
} catch (final MalformedURLException e) {
|
||||
this.url = null;
|
||||
}
|
||||
this.urlRaw = null;
|
||||
this.urlHash = null;
|
||||
}
|
||||
return this.url;
|
||||
}
|
||||
public String dc_title() { return this.dc_title; }
|
||||
public String dc_creator() { return this.dc_creator; }
|
||||
public String dc_publisher() { return this.dc_publisher; }
|
||||
public String dc_subject() { return this.dc_subject; }
|
||||
public float lat() {
|
||||
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
|
||||
final int p = this.latlon.indexOf(',');
|
||||
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(0, p));
|
||||
}
|
||||
public float lon() {
|
||||
if (this.latlon == null || this.latlon.length() == 0) return 0.0f;
|
||||
final int p = this.latlon.indexOf(',');
|
||||
return p < 0 ? 0.0f : Float.parseFloat(this.latlon.substring(p + 1));
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
|
@ -157,7 +157,16 @@ public final class Records {
|
|||
* @throws IOException
|
||||
*/
|
||||
private final long filesize() throws IOException {
|
||||
return raf.length() / recordsize;
|
||||
long records = 0;
|
||||
|
||||
try {
|
||||
records = raf.length() / recordsize;
|
||||
} catch (NullPointerException e) {
|
||||
// This may happen on shutdown while still something is moving on
|
||||
Log.logException(e);
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
0
source/net/yacy/kelondro/table/Relations.java
Executable file → Normal file
0
source/net/yacy/kelondro/table/Relations.java
Executable file → Normal file
0
source/net/yacy/kelondro/util/ISO639.java
Executable file → Normal file
0
source/net/yacy/kelondro/util/ISO639.java
Executable file → Normal file
|
@ -61,9 +61,11 @@ import net.yacy.peers.operation.yacySeedUploadFtp;
|
|||
import net.yacy.peers.operation.yacySeedUploadScp;
|
||||
import net.yacy.peers.operation.yacySeedUploader;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.search.SwitchboardConstants;
|
||||
import de.anomic.server.serverCore;
|
||||
|
||||
public class Network {
|
||||
public class Network
|
||||
{
|
||||
|
||||
// statics
|
||||
public static final ThreadGroup publishThreadGroup = new ThreadGroup("publishThreadGroup");
|
||||
|
@ -127,7 +129,9 @@ public class Network {
|
|||
}
|
||||
|
||||
public final void publishSeedList() {
|
||||
if (log.isFine()) log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("yacyCore.publishSeedList: Triggered Seed Publish");
|
||||
}
|
||||
|
||||
/*
|
||||
if (oldIPStamp.equals((String) seedDB.mySeed.get(yacySeed.IP, "127.0.0.1")))
|
||||
|
@ -138,24 +142,24 @@ public class Network {
|
|||
yacyCore.log.logDebug("***DEBUG publishSeedList: I can reach myself");
|
||||
*/
|
||||
|
||||
if ((this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP())) &&
|
||||
(this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected()) &&
|
||||
(canReachMyself()) &&
|
||||
(System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24) &&
|
||||
(this.sb.peers.mySeed().isPrincipal())
|
||||
) {
|
||||
if (log.isFine()) log.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
|
||||
if ( (this.sb.peers.lastSeedUpload_myIP.equals(this.sb.peers.mySeed().getIP()))
|
||||
&& (this.sb.peers.lastSeedUpload_seedDBSize == this.sb.peers.sizeConnected())
|
||||
&& (canReachMyself())
|
||||
&& (System.currentTimeMillis() - this.sb.peers.lastSeedUpload_timeStamp < 1000 * 60 * 60 * 24)
|
||||
&& (this.sb.peers.mySeed().isPrincipal()) ) {
|
||||
if ( log.isFine() ) {
|
||||
log
|
||||
.logFine("yacyCore.publishSeedList: not necessary to publish: oldIP is equal, sizeConnected is equal and I can reach myself under the old IP.");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// getting the seed upload method that should be used ...
|
||||
final String seedUploadMethod = this.sb.getConfig("seedUploadMethod", "");
|
||||
|
||||
if (
|
||||
(!seedUploadMethod.equalsIgnoreCase("none")) ||
|
||||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0)) ||
|
||||
((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0))
|
||||
) {
|
||||
if ( (!seedUploadMethod.equalsIgnoreCase("none"))
|
||||
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFTPPassword", "").length() > 0))
|
||||
|| ((seedUploadMethod.equals("")) && (this.sb.getConfig("seedFilePath", "").length() > 0)) ) {
|
||||
if ( seedUploadMethod.equals("") ) {
|
||||
if ( this.sb.getConfig("seedFTPPassword", "").length() > 0 ) {
|
||||
this.sb.setConfig("seedUploadMethod", "Ftp");
|
||||
|
@ -170,13 +174,16 @@ public class Network {
|
|||
if ( seedUploadMethod.equals("") ) {
|
||||
this.sb.setConfig("seedUploadMethod", "none");
|
||||
}
|
||||
if (log.isFine()) log.logFine("yacyCore.publishSeedList: No uploading method configured");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("yacyCore.publishSeedList: No uploading method configured");
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
public final void peerPing() {
|
||||
if ((this.sb.isRobinsonMode()) && (this.sb.getConfig("cluster.mode", "").equals("privatepeer"))) {
|
||||
if ( (this.sb.isRobinsonMode())
|
||||
&& (this.sb.getConfig(SwitchboardConstants.CLUSTER_MODE, "").equals(SwitchboardConstants.CLUSTER_MODE_PRIVATE_PEER)) ) {
|
||||
// in case this peer is a privat peer we omit the peer ping
|
||||
// all other robinson peer types do a peer ping:
|
||||
// the privatecluster does the ping to the other cluster members
|
||||
|
@ -192,11 +199,17 @@ public class Network {
|
|||
if ( this.sb.peers.sizeConnected() == 0 ) {
|
||||
// reload the seed lists
|
||||
this.sb.loadSeedLists();
|
||||
log.logInfo("re-initialized seed list. received " + this.sb.peers.sizeConnected() + " new peer(s)");
|
||||
log.logInfo("re-initialized seed list. received "
|
||||
+ this.sb.peers.sizeConnected()
|
||||
+ " new peer(s)");
|
||||
}
|
||||
final int newSeeds = publishMySeed(false);
|
||||
if ( newSeeds > 0 ) {
|
||||
log.logInfo("received " + newSeeds + " new peer(s), know a total of " + this.sb.peers.sizeConnected() + " different peers");
|
||||
log.logInfo("received "
|
||||
+ newSeeds
|
||||
+ " new peer(s), know a total of "
|
||||
+ this.sb.peers.sizeConnected()
|
||||
+ " different peers");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -215,16 +228,22 @@ public class Network {
|
|||
}
|
||||
|
||||
// use our own formatter to prevent concurrency locks with other processes
|
||||
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second);
|
||||
private final static GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(
|
||||
GenericFormatter.FORMAT_SHORT_SECOND,
|
||||
GenericFormatter.time_second);
|
||||
|
||||
protected class publishThread extends Thread {
|
||||
protected class publishThread extends Thread
|
||||
{
|
||||
int added;
|
||||
private final Seed seed;
|
||||
private final Semaphore sync;
|
||||
private final List<Thread> syncList;
|
||||
|
||||
public publishThread(final ThreadGroup tg, final Seed seed,
|
||||
final Semaphore sync, final List<Thread> syncList) throws InterruptedException {
|
||||
public publishThread(
|
||||
final ThreadGroup tg,
|
||||
final Seed seed,
|
||||
final Semaphore sync,
|
||||
final List<Thread> syncList) throws InterruptedException {
|
||||
super(tg, "PublishSeed_" + seed.getName());
|
||||
|
||||
this.sync = sync;
|
||||
|
@ -235,50 +254,106 @@ public class Network {
|
|||
this.added = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void run() {
|
||||
try {
|
||||
this.added = Protocol.hello(Network.this.sb.peers.mySeed(), Network.this.sb.peers.peerActions, this.seed.getClusterAddress(), this.seed.hash, this.seed.getName());
|
||||
this.added =
|
||||
Protocol.hello(
|
||||
Network.this.sb.peers.mySeed(),
|
||||
Network.this.sb.peers.peerActions,
|
||||
this.seed.getClusterAddress(),
|
||||
this.seed.hash,
|
||||
this.seed.getName());
|
||||
if ( this.added < 0 ) {
|
||||
// no or wrong response, delete that address
|
||||
final String cause = "peer ping to peer resulted in error response (added < 0)";
|
||||
log.logInfo("publish: disconnected " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' from " + this.seed.getPublicAddress() + ": " + cause);
|
||||
log.logInfo("publish: disconnected "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' from "
|
||||
+ this.seed.getPublicAddress()
|
||||
+ ": "
|
||||
+ cause);
|
||||
Network.this.sb.peers.peerActions.peerDeparture(this.seed, cause);
|
||||
} else {
|
||||
// success! we have published our peer to a senior peer
|
||||
// update latest news from the other peer
|
||||
log.logInfo("publish: handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress());
|
||||
log.logInfo("publish: handshaked "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' at "
|
||||
+ this.seed.getPublicAddress());
|
||||
// check if seed's lastSeen has been updated
|
||||
final Seed newSeed = Network.this.sb.peers.getConnected(this.seed.hash);
|
||||
if ( newSeed != null ) {
|
||||
if ( !newSeed.isOnline() ) {
|
||||
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
|
||||
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " is not online." +
|
||||
" Removing Peer from connected");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("publish: recently handshaked "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' at "
|
||||
+ this.seed.getPublicAddress()
|
||||
+ " is not online."
|
||||
+ " Removing Peer from connected");
|
||||
}
|
||||
Network.this.sb.peers.peerActions.peerDeparture(newSeed, "peer not online");
|
||||
} else
|
||||
if (newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000)) {
|
||||
} else if ( newSeed.getLastSeenUTC() < (System.currentTimeMillis() - 10000) ) {
|
||||
// update last seed date
|
||||
if ( newSeed.getLastSeenUTC() >= this.seed.getLastSeenUTC() ) {
|
||||
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
|
||||
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
|
||||
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "'");
|
||||
if ( log.isFine() ) {
|
||||
log
|
||||
.logFine("publish: recently handshaked "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' at "
|
||||
+ this.seed.getPublicAddress()
|
||||
+ " with old LastSeen: '"
|
||||
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
|
||||
.getLastSeenUTC())) + "'");
|
||||
}
|
||||
newSeed.setLastSeenUTC();
|
||||
Network.this.sb.peers.peerActions.peerArrival(newSeed, true);
|
||||
} else {
|
||||
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) +
|
||||
" peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " with old LastSeen: '" +
|
||||
my_SHORT_SECOND_FORMATTER.format(new Date(newSeed.getLastSeenUTC())) + "', this is more recent: '" +
|
||||
my_SHORT_SECOND_FORMATTER.format(new Date(this.seed.getLastSeenUTC())) + "'");
|
||||
if ( log.isFine() ) {
|
||||
log
|
||||
.logFine("publish: recently handshaked "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' at "
|
||||
+ this.seed.getPublicAddress()
|
||||
+ " with old LastSeen: '"
|
||||
+ my_SHORT_SECOND_FORMATTER.format(new Date(newSeed
|
||||
.getLastSeenUTC()))
|
||||
+ "', this is more recent: '"
|
||||
+ my_SHORT_SECOND_FORMATTER.format(new Date(this.seed
|
||||
.getLastSeenUTC()))
|
||||
+ "'");
|
||||
}
|
||||
this.seed.setLastSeenUTC();
|
||||
Network.this.sb.peers.peerActions.peerArrival(this.seed, true);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (log.isFine()) log.logFine("publish: recently handshaked " + this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR) + " peer '" + this.seed.getName() + "' at " + this.seed.getPublicAddress() + " not in connectedDB");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("publish: recently handshaked "
|
||||
+ this.seed.get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR)
|
||||
+ " peer '"
|
||||
+ this.seed.getName()
|
||||
+ "' at "
|
||||
+ this.seed.getPublicAddress()
|
||||
+ " not in connectedDB");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch ( final Exception e ) {
|
||||
log.logSevere("publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(), e);
|
||||
log.logSevere(
|
||||
"publishThread: error with target seed " + this.seed.toString() + ": " + e.getMessage(),
|
||||
e);
|
||||
} finally {
|
||||
this.syncList.add(this);
|
||||
this.sync.release();
|
||||
|
@ -311,9 +386,12 @@ public class Network {
|
|||
|
||||
// getting a list of peers to contact
|
||||
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN).equals(Seed.PEERTYPE_VIRGIN) ) {
|
||||
if (attempts > PING_INITIAL) { attempts = PING_INITIAL; }
|
||||
if ( attempts > PING_INITIAL ) {
|
||||
attempts = PING_INITIAL;
|
||||
}
|
||||
final Map<byte[], String> ch = Switchboard.getSwitchboard().clusterhashes;
|
||||
seeds = PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
|
||||
seeds =
|
||||
PeerSelection.seedsByAge(this.sb.peers, true, attempts - ((ch == null) ? 0 : ch.size())); // best for fast connection
|
||||
// add also all peers from cluster if this is a public robinson cluster
|
||||
if ( ch != null ) {
|
||||
final Iterator<Map.Entry<byte[], String>> i = ch.entrySet().iterator();
|
||||
|
@ -326,7 +404,9 @@ public class Network {
|
|||
seed = seeds.get(hash);
|
||||
if ( seed == null ) {
|
||||
seed = this.sb.peers.get(hash);
|
||||
if (seed == null) continue;
|
||||
if ( seed == null ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
seed.setAlternativeAddress(entry.getValue());
|
||||
seeds.put(hash, seed);
|
||||
|
@ -336,15 +416,23 @@ public class Network {
|
|||
int diff = PING_MIN_DBSIZE - amIAccessibleDB.size();
|
||||
if ( diff > PING_MIN_RUNNING ) {
|
||||
diff = Math.min(diff, PING_MAX_RUNNING);
|
||||
if (attempts > diff) { attempts = diff; }
|
||||
if ( attempts > diff ) {
|
||||
attempts = diff;
|
||||
}
|
||||
} else {
|
||||
if (attempts > PING_MIN_RUNNING) { attempts = PING_MIN_RUNNING; }
|
||||
if ( attempts > PING_MIN_RUNNING ) {
|
||||
attempts = PING_MIN_RUNNING;
|
||||
}
|
||||
}
|
||||
seeds = PeerSelection.seedsByAge(this.sb.peers, false, attempts); // best for seed list maintenance/cleaning
|
||||
}
|
||||
|
||||
if (seeds == null || seeds.isEmpty()) { return 0; }
|
||||
if (seeds.size() < attempts) { attempts = seeds.size(); }
|
||||
if ( seeds == null || seeds.isEmpty() ) {
|
||||
return 0;
|
||||
}
|
||||
if ( seeds.size() < attempts ) {
|
||||
attempts = seeds.size();
|
||||
}
|
||||
|
||||
// This will try to get Peers that are not currently in amIAccessibleDB
|
||||
final Iterator<Seed> si = seeds.values().iterator();
|
||||
|
@ -380,11 +468,16 @@ public class Network {
|
|||
i++;
|
||||
|
||||
final String address = seed.getClusterAddress();
|
||||
if (log.isFine()) log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("HELLO #" + i + " to peer '" + seed.get(Seed.NAME, "") + "' at " + address); // debug
|
||||
}
|
||||
final String seederror = seed.isProper(false);
|
||||
if ( (address == null) || (seederror != null) ) {
|
||||
// we don't like that address, delete it
|
||||
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = " + address + "; seederror = " + seederror);
|
||||
this.sb.peers.peerActions.peerDeparture(seed, "peer ping to peer resulted in address = "
|
||||
+ address
|
||||
+ "; seederror = "
|
||||
+ seederror);
|
||||
sync.acquire();
|
||||
} else {
|
||||
// starting a new publisher thread
|
||||
|
@ -438,16 +531,24 @@ public class Network {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (log.isFine()) log.logFine("DBSize before -> after Cleanup: " + dbSize + " -> " + amIAccessibleDB.size());
|
||||
if ( log.isFine() ) {
|
||||
log
|
||||
.logFine("DBSize before -> after Cleanup: "
|
||||
+ dbSize
|
||||
+ " -> "
|
||||
+ amIAccessibleDB.size());
|
||||
}
|
||||
log.logInfo("PeerPing: I am accessible for " + accessible +
|
||||
" peer(s), not accessible for " + notaccessible + " peer(s).");
|
||||
}
|
||||
log.logInfo("PeerPing: I am accessible for "
|
||||
+ accessible
|
||||
+ " peer(s), not accessible for "
|
||||
+ notaccessible
|
||||
+ " peer(s).");
|
||||
|
||||
if ( (accessible + notaccessible) > 0 ) {
|
||||
final String newPeerType;
|
||||
// At least one other Peer told us our type
|
||||
if ((accessible >= PING_MIN_PEERSEEN) ||
|
||||
(accessible >= notaccessible)) {
|
||||
if ( (accessible >= PING_MIN_PEERSEEN) || (accessible >= notaccessible) ) {
|
||||
// We can be reached from a majority of other Peers
|
||||
if ( this.sb.peers.mySeed().isPrincipal() ) {
|
||||
newPeerType = Seed.PEERTYPE_PRINCIPAL;
|
||||
|
@ -461,7 +562,11 @@ public class Network {
|
|||
if ( this.sb.peers.mySeed().orVirgin().equals(newPeerType) ) {
|
||||
log.logInfo("PeerPing: myType is " + this.sb.peers.mySeed().orVirgin());
|
||||
} else {
|
||||
log.logInfo("PeerPing: changing myType from '" + this.sb.peers.mySeed().orVirgin() + "' to '" + newPeerType + "'");
|
||||
log.logInfo("PeerPing: changing myType from '"
|
||||
+ this.sb.peers.mySeed().orVirgin()
|
||||
+ "' to '"
|
||||
+ newPeerType
|
||||
+ "'");
|
||||
this.sb.peers.mySeed().put(Seed.PEERTYPE, newPeerType);
|
||||
}
|
||||
} else {
|
||||
|
@ -474,8 +579,12 @@ public class Network {
|
|||
this.sb.peers.saveMySeed();
|
||||
|
||||
// if we have an address, we do nothing
|
||||
if (this.sb.peers.mySeed().isProper(true) == null && !force) { return 0; }
|
||||
if (newSeeds > 0) return newSeeds;
|
||||
if ( this.sb.peers.mySeed().isProper(true) == null && !force ) {
|
||||
return 0;
|
||||
}
|
||||
if ( newSeeds > 0 ) {
|
||||
return newSeeds;
|
||||
}
|
||||
|
||||
// still no success: ask own NAT or internet responder
|
||||
//final boolean DI604use = switchboard.getConfig("DI604use", "false").equals("true");
|
||||
|
@ -484,11 +593,16 @@ public class Network {
|
|||
//if (ip.equals("")) ip = natLib.retrieveIP(DI604use, DI604pw);
|
||||
|
||||
// yacyCore.log.logDebug("DEBUG: new IP=" + ip);
|
||||
if (Seed.isProperIP(ip) == null) this.sb.peers.mySeed().setIP(ip);
|
||||
if (this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR)) // ???????????????
|
||||
if ( Seed.isProperIP(ip) == null ) {
|
||||
this.sb.peers.mySeed().setIP(ip);
|
||||
}
|
||||
if ( this.sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR).equals(Seed.PEERTYPE_JUNIOR) ) {
|
||||
this.sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); // to start bootstraping, we need to be recognised as PEERTYPE_SENIOR peer
|
||||
log.logInfo("publish: no recipient found, our address is " +
|
||||
((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers.mySeed().getPublicAddress()));
|
||||
}
|
||||
log.logInfo("publish: no recipient found, our address is "
|
||||
+ ((this.sb.peers.mySeed().getPublicAddress() == null) ? "unknown" : this.sb.peers
|
||||
.mySeed()
|
||||
.getPublicAddress()));
|
||||
this.sb.peers.saveMySeed();
|
||||
return 0;
|
||||
} catch ( final InterruptedException e ) {
|
||||
|
@ -499,11 +613,16 @@ public class Network {
|
|||
Thread.interrupted();
|
||||
|
||||
// interrupt all already started publishThreads
|
||||
log.logInfo("publish: Signaling shutdown to " + Network.publishThreadGroup.activeCount() + " remaining publishing threads ...");
|
||||
log.logInfo("publish: Signaling shutdown to "
|
||||
+ Network.publishThreadGroup.activeCount()
|
||||
+ " remaining publishing threads ...");
|
||||
Network.publishThreadGroup.interrupt();
|
||||
|
||||
// waiting some time for the publishThreads to finish execution
|
||||
try { Thread.sleep(500); } catch (final InterruptedException ex) {}
|
||||
try {
|
||||
Thread.sleep(500);
|
||||
} catch ( final InterruptedException ex ) {
|
||||
}
|
||||
|
||||
// getting the amount of remaining publishing threads
|
||||
int threadCount = Network.publishThreadGroup.activeCount();
|
||||
|
@ -511,20 +630,33 @@ public class Network {
|
|||
threadCount = Network.publishThreadGroup.enumerate(threadList);
|
||||
|
||||
// we need to use a timeout here because of missing interruptable session threads ...
|
||||
if (log.isFine()) log.logFine("publish: Waiting for " + Network.publishThreadGroup.activeCount() + " remaining publishing threads to finish shutdown ...");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("publish: Waiting for "
|
||||
+ Network.publishThreadGroup.activeCount()
|
||||
+ " remaining publishing threads to finish shutdown ...");
|
||||
}
|
||||
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
|
||||
final Thread currentThread = threadList[currentThreadIdx];
|
||||
|
||||
if ( currentThread.isAlive() ) {
|
||||
if (log.isFine()) log.logFine("publish: Waiting for remaining publishing thread '" + currentThread.getName() + "' to finish shutdown");
|
||||
try { currentThread.join(500); } catch (final InterruptedException ex) {}
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("publish: Waiting for remaining publishing thread '"
|
||||
+ currentThread.getName()
|
||||
+ "' to finish shutdown");
|
||||
}
|
||||
try {
|
||||
currentThread.join(500);
|
||||
} catch ( final InterruptedException ex ) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.logInfo("publish: Shutdown off all remaining publishing thread finished.");
|
||||
|
||||
} catch ( final Exception ee ) {
|
||||
log.logWarning("publish: Unexpected error while trying to shutdown all remaining publishing threads.", e);
|
||||
log.logWarning(
|
||||
"publish: Unexpected error while trying to shutdown all remaining publishing threads.",
|
||||
e);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -546,7 +678,9 @@ public class Network {
|
|||
}
|
||||
}
|
||||
|
||||
if (className == null) { return null; }
|
||||
if ( className == null ) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
final Class<?> uploaderClass = Class.forName(className);
|
||||
final Object uploader = uploaderClass.newInstance();
|
||||
|
@ -559,17 +693,30 @@ public class Network {
|
|||
public static void loadSeedUploadMethods() {
|
||||
yacySeedUploader uploader;
|
||||
uploader = new yacySeedUploadFile();
|
||||
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
Network.seedUploadMethods.put(uploader
|
||||
.getClass()
|
||||
.getSimpleName()
|
||||
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
uploader = new yacySeedUploadFtp();
|
||||
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
Network.seedUploadMethods.put(uploader
|
||||
.getClass()
|
||||
.getSimpleName()
|
||||
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
uploader = new yacySeedUploadScp();
|
||||
Network.seedUploadMethods.put(uploader.getClass().getSimpleName().substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
Network.seedUploadMethods.put(uploader
|
||||
.getClass()
|
||||
.getSimpleName()
|
||||
.substring("yacySeedUpload".length()), uploader.getClass().getCanonicalName());
|
||||
}
|
||||
|
||||
public static boolean changeSeedUploadMethod(final String method) {
|
||||
if (method == null || method.length() == 0) { return false; }
|
||||
if ( method == null || method.length() == 0 ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (method.equalsIgnoreCase("none")) { return true; }
|
||||
if ( method.equalsIgnoreCase("none") ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
synchronized ( Network.seedUploadMethods ) {
|
||||
return Network.seedUploadMethods.containsKey(method);
|
||||
|
@ -592,27 +739,30 @@ public class Network {
|
|||
String seedUploadMethod = sb.getConfig("seedUploadMethod", "");
|
||||
|
||||
// for backward compatiblity ....
|
||||
if (seedUploadMethod.equalsIgnoreCase("Ftp") ||
|
||||
(seedUploadMethod.equals("") &&
|
||||
sb.getConfig("seedFTPPassword", "").length() > 0)) {
|
||||
if ( seedUploadMethod.equalsIgnoreCase("Ftp")
|
||||
|| (seedUploadMethod.equals("") && sb.getConfig("seedFTPPassword", "").length() > 0) ) {
|
||||
|
||||
seedUploadMethod = "Ftp";
|
||||
sb.setConfig("seedUploadMethod", seedUploadMethod);
|
||||
|
||||
} else if (seedUploadMethod.equalsIgnoreCase("File") ||
|
||||
(seedUploadMethod.equals("") &&
|
||||
sb.getConfig("seedFilePath", "").length() > 0)) {
|
||||
} else if ( seedUploadMethod.equalsIgnoreCase("File")
|
||||
|| (seedUploadMethod.equals("") && sb.getConfig("seedFilePath", "").length() > 0) ) {
|
||||
|
||||
seedUploadMethod = "File";
|
||||
sb.setConfig("seedUploadMethod", seedUploadMethod);
|
||||
}
|
||||
|
||||
// determine the seed uploader that should be used ...
|
||||
if (seedUploadMethod.equalsIgnoreCase("none")) { return "no uploader specified"; }
|
||||
if ( seedUploadMethod.equalsIgnoreCase("none") ) {
|
||||
return "no uploader specified";
|
||||
}
|
||||
|
||||
final yacySeedUploader uploader = getSeedUploader(seedUploadMethod);
|
||||
if ( uploader == null ) {
|
||||
final String errorMsg = "Unable to get the proper uploader-class for seed uploading method '" + seedUploadMethod + "'.";
|
||||
final String errorMsg =
|
||||
"Unable to get the proper uploader-class for seed uploading method '"
|
||||
+ seedUploadMethod
|
||||
+ "'.";
|
||||
log.logWarning("SaveSeedList: " + errorMsg);
|
||||
return errorMsg;
|
||||
}
|
||||
|
@ -621,35 +771,51 @@ public class Network {
|
|||
DigestURI seedURL;
|
||||
try {
|
||||
final String seedURLStr = sb.peers.mySeed().get(Seed.SEEDLISTURL, "");
|
||||
if (seedURLStr.length() == 0) { throw new MalformedURLException("The seed-file url must not be empty."); }
|
||||
if (!(
|
||||
seedURLStr.toLowerCase().startsWith("http://") ||
|
||||
seedURLStr.toLowerCase().startsWith("https://")
|
||||
)) {
|
||||
if ( seedURLStr.length() == 0 ) {
|
||||
throw new MalformedURLException("The seed-file url must not be empty.");
|
||||
}
|
||||
if ( !(seedURLStr.toLowerCase().startsWith("http://") || seedURLStr.toLowerCase().startsWith(
|
||||
"https://")) ) {
|
||||
throw new MalformedURLException("Unsupported protocol.");
|
||||
}
|
||||
seedURL = new DigestURI(seedURLStr);
|
||||
} catch ( final MalformedURLException e ) {
|
||||
final String errorMsg = "Malformed seed file URL '" + sb.peers.mySeed().get(Seed.SEEDLISTURL, "") + "'. " + e.getMessage();
|
||||
final String errorMsg =
|
||||
"Malformed seed file URL '"
|
||||
+ sb.peers.mySeed().get(Seed.SEEDLISTURL, "")
|
||||
+ "'. "
|
||||
+ e.getMessage();
|
||||
log.logWarning("SaveSeedList: " + errorMsg);
|
||||
return errorMsg;
|
||||
}
|
||||
|
||||
// upload the seed-list using the configured uploader class
|
||||
String prevStatus = sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
|
||||
if (prevStatus.equals(Seed.PEERTYPE_PRINCIPAL)) { prevStatus = Seed.PEERTYPE_SENIOR; }
|
||||
if ( prevStatus.equals(Seed.PEERTYPE_PRINCIPAL) ) {
|
||||
prevStatus = Seed.PEERTYPE_SENIOR;
|
||||
}
|
||||
|
||||
try {
|
||||
sb.peers.mySeed().put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); // this information shall also be uploaded
|
||||
|
||||
if (log.isFine()) log.logFine("SaveSeedList: Using seed uploading method '" + seedUploadMethod + "' for seed-list uploading." +
|
||||
"\n\tPrevious peerType is '" + sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR) + "'.");
|
||||
if ( log.isFine() ) {
|
||||
log.logFine("SaveSeedList: Using seed uploading method '"
|
||||
+ seedUploadMethod
|
||||
+ "' for seed-list uploading."
|
||||
+ "\n\tPrevious peerType is '"
|
||||
+ sb.peers.mySeed().get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR)
|
||||
+ "'.");
|
||||
}
|
||||
|
||||
logt = sb.peers.uploadSeedList(uploader, sb, sb.peers, seedURL);
|
||||
if ( logt != null ) {
|
||||
if ( logt.indexOf("Error", 0) >= 0 ) {
|
||||
sb.peers.mySeed().put(Seed.PEERTYPE, prevStatus);
|
||||
final String errorMsg = "SaveSeedList: seed upload failed using " + uploader.getClass().getName() + " (error): " + logt.substring(logt.indexOf("Error",0) + 6);
|
||||
final String errorMsg =
|
||||
"SaveSeedList: seed upload failed using "
|
||||
+ uploader.getClass().getName()
|
||||
+ " (error): "
|
||||
+ logt.substring(logt.indexOf("Error", 0) + 6);
|
||||
log.logSevere(errorMsg);
|
||||
return errorMsg;
|
||||
}
|
||||
|
|
|
@ -77,7 +77,8 @@ import net.yacy.search.Switchboard;
|
|||
import de.anomic.tools.bitfield;
|
||||
import de.anomic.tools.crypt;
|
||||
|
||||
public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
||||
public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed>
|
||||
{
|
||||
|
||||
public static String ANON_PREFIX = "_anon";
|
||||
|
||||
|
@ -186,7 +187,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
this.hash = theHash;
|
||||
this.dna = theDna;
|
||||
final String flags = this.dna.get(Seed.FLAGS);
|
||||
if ((flags == null) || (flags.length() != 4)) { this.dna.put(Seed.FLAGS, Seed.FLAGSZERO); }
|
||||
if ( (flags == null) || (flags.length() != 4) ) {
|
||||
this.dna.put(Seed.FLAGS, Seed.FLAGSZERO);
|
||||
}
|
||||
this.dna.put(Seed.NAME, checkPeerName(get(Seed.NAME, "∅")));
|
||||
this.birthdate = -1; // this means 'not yet parsed', parse that later when it is used
|
||||
}
|
||||
|
@ -243,10 +246,12 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* check the peer name: protect against usage as XSS hack
|
||||
*
|
||||
* @param id
|
||||
* @return a checked name without "<" and ">"
|
||||
*/
|
||||
private final static Pattern tp = Pattern.compile("<|>");
|
||||
|
||||
public static String checkPeerName(String name) {
|
||||
name = tp.matcher(name).replaceAll("_");
|
||||
return name;
|
||||
|
@ -254,14 +259,21 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* generate a default peer name
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private static String defaultPeerName() {
|
||||
return ANON_PREFIX + OS.infoKey() + "-" + (System.currentTimeMillis() % 77777777L) + "-" + Network.speedKey;
|
||||
return ANON_PREFIX
|
||||
+ OS.infoKey()
|
||||
+ "-"
|
||||
+ (System.currentTimeMillis() % 77777777L)
|
||||
+ "-"
|
||||
+ Network.speedKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for the static fragments of a generated default peer name, such as the string 'dpn'
|
||||
*
|
||||
* @see #makeDefaultPeerName()
|
||||
* @param name the peer name to check for default peer name compliance
|
||||
* @return whether the given peer name may be a default generated peer name
|
||||
|
@ -271,92 +283,151 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
}
|
||||
|
||||
/**
|
||||
* used when doing routing within a cluster; this can assign a ip and a port
|
||||
* that is used instead the address stored in the seed DNA
|
||||
* used when doing routing within a cluster; this can assign a ip and a port that is used instead the
|
||||
* address stored in the seed DNA
|
||||
*/
|
||||
public void setAlternativeAddress(final String ipport) {
|
||||
if (ipport == null) return;
|
||||
if ( ipport == null ) {
|
||||
return;
|
||||
}
|
||||
final int p = ipport.indexOf(':');
|
||||
if (p < 0) this.alternativeIP = ipport; else this.alternativeIP = ipport.substring(0, p);
|
||||
if ( p < 0 ) {
|
||||
this.alternativeIP = ipport;
|
||||
} else {
|
||||
this.alternativeIP = ipport.substring(0, p);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the IP<br>
|
||||
*
|
||||
* @return the IP or null
|
||||
*/
|
||||
public final String getIP() {
|
||||
final String ip = get(Seed.IP, "127.0.0.1");
|
||||
return (ip == null || ip.length() == 0) ? "127.0.0.1" : ip;
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the peertype<br>
|
||||
*
|
||||
* @return the peertype or null
|
||||
*/
|
||||
public final String getPeerType() { return get(Seed.PEERTYPE, ""); }
|
||||
public final String getPeerType() {
|
||||
return get(Seed.PEERTYPE, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the peertype<br>
|
||||
*
|
||||
* @return the peertype or "virgin"
|
||||
*/
|
||||
public final String orVirgin() { return get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN); }
|
||||
public final String orVirgin() {
|
||||
return get(Seed.PEERTYPE, Seed.PEERTYPE_VIRGIN);
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the peertype<br>
|
||||
*
|
||||
* @return the peertype or "junior"
|
||||
*/
|
||||
public final String orJunior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); }
|
||||
public final String orJunior() {
|
||||
return get(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the peertype<br>
|
||||
*
|
||||
* @return the peertype or "senior"
|
||||
*/
|
||||
public final String orSenior() { return get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); }
|
||||
public final String orSenior() {
|
||||
return get(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* try to get the peertype<br>
|
||||
*
|
||||
* @return the peertype or "principal"
|
||||
*/
|
||||
public final String orPrincipal() { return get(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); }
|
||||
public final String orPrincipal() {
|
||||
return get(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the peer's DNA (its set of peer defining values, e.g. IP, name, version, ...)
|
||||
*
|
||||
* @param key the key for the value to fetch
|
||||
* @param dflt the default value
|
||||
*/
|
||||
public final String get(final String key, final String dflt) {
|
||||
final Object o = this.dna.get(key);
|
||||
if (o == null) { return dflt; }
|
||||
if ( o == null ) {
|
||||
return dflt;
|
||||
}
|
||||
return (String) o;
|
||||
}
|
||||
|
||||
public final float getFloat(final String key, final float dflt) {
|
||||
final Object o = this.dna.get(key);
|
||||
if (o == null) { return dflt; }
|
||||
if (o instanceof String) try {
|
||||
if ( o == null ) {
|
||||
return dflt;
|
||||
}
|
||||
if ( o instanceof String ) {
|
||||
try {
|
||||
return Float.parseFloat((String) o);
|
||||
} catch ( final NumberFormatException e ) {
|
||||
return dflt;
|
||||
}
|
||||
} else if ( o instanceof Float ) {
|
||||
return ((Float) o).floatValue();
|
||||
} else return dflt;
|
||||
} else {
|
||||
return dflt;
|
||||
}
|
||||
}
|
||||
|
||||
public final long getLong(final String key, final long dflt) {
|
||||
final Object o = this.dna.get(key);
|
||||
if (o == null) { return dflt; }
|
||||
if (o instanceof String) try {
|
||||
if ( o == null ) {
|
||||
return dflt;
|
||||
}
|
||||
if ( o instanceof String ) {
|
||||
try {
|
||||
return Long.parseLong((String) o);
|
||||
} catch ( final NumberFormatException e ) {
|
||||
return dflt;
|
||||
}
|
||||
} else if ( o instanceof Long ) {
|
||||
return ((Long) o).longValue();
|
||||
} else if ( o instanceof Integer ) {
|
||||
return ((Integer) o).intValue();
|
||||
} else return dflt;
|
||||
} else {
|
||||
return dflt;
|
||||
}
|
||||
}
|
||||
|
||||
public final void setIP(final String ip) { this.dna.put(Seed.IP, ip); }
|
||||
public final void setPort(final String port) { this.dna.put(Seed.PORT, port); }
|
||||
public final void setType(final String type) { this.dna.put(Seed.PEERTYPE, type); }
|
||||
public final void setJunior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR); }
|
||||
public final void setSenior() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR); }
|
||||
public final void setPrincipal() { this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL); }
|
||||
public final void setIP(final String ip) {
|
||||
this.dna.put(Seed.IP, ip);
|
||||
}
|
||||
|
||||
public final void setPort(final String port) {
|
||||
this.dna.put(Seed.PORT, port);
|
||||
}
|
||||
|
||||
public final void setType(final String type) {
|
||||
this.dna.put(Seed.PEERTYPE, type);
|
||||
}
|
||||
|
||||
public final void setJunior() {
|
||||
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_JUNIOR);
|
||||
}
|
||||
|
||||
public final void setSenior() {
|
||||
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_SENIOR);
|
||||
}
|
||||
|
||||
public final void setPrincipal() {
|
||||
this.dna.put(Seed.PEERTYPE, Seed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
||||
public final void put(final String key, final String value) {
|
||||
synchronized ( this.dna ) {
|
||||
|
@ -385,25 +456,33 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
public final void incSI(final int count) {
|
||||
String v = this.dna.get(Seed.INDEX_OUT);
|
||||
if (v == null) { v = Seed.ZERO; }
|
||||
if ( v == null ) {
|
||||
v = Seed.ZERO;
|
||||
}
|
||||
this.dna.put(Seed.INDEX_OUT, Long.toString(Long.parseLong(v) + count));
|
||||
}
|
||||
|
||||
public final void incRI(final int count) {
|
||||
String v = this.dna.get(Seed.INDEX_IN);
|
||||
if (v == null) { v = Seed.ZERO; }
|
||||
if ( v == null ) {
|
||||
v = Seed.ZERO;
|
||||
}
|
||||
this.dna.put(Seed.INDEX_IN, Long.toString(Long.parseLong(v) + count));
|
||||
}
|
||||
|
||||
public final void incSU(final int count) {
|
||||
String v = this.dna.get(Seed.URL_OUT);
|
||||
if (v == null) { v = Seed.ZERO; }
|
||||
if ( v == null ) {
|
||||
v = Seed.ZERO;
|
||||
}
|
||||
this.dna.put(Seed.URL_OUT, Long.toString(Long.parseLong(v) + count));
|
||||
}
|
||||
|
||||
public final void incRU(final int count) {
|
||||
String v = this.dna.get(Seed.URL_IN);
|
||||
if (v == null) { v = Seed.ZERO; }
|
||||
if ( v == null ) {
|
||||
v = Seed.ZERO;
|
||||
}
|
||||
this.dna.put(Seed.URL_IN, Long.toString(Long.parseLong(v) + count));
|
||||
}
|
||||
|
||||
|
@ -416,11 +495,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* <code>12 * 6 bit = 72 bit = 24</code> characters octal-hash
|
||||
* <p>Octal hashes are used for cache-dumps that are DHT-ready</p>
|
||||
* <p>
|
||||
* Cause: the natural order of octal hashes are the same as the b64-order of b64Hashes.
|
||||
* a hexhash cannot be used in such cases, and b64Hashes are not appropriate for file names
|
||||
* Octal hashes are used for cache-dumps that are DHT-ready
|
||||
* </p>
|
||||
* <p>
|
||||
* Cause: the natural order of octal hashes are the same as the b64-order of b64Hashes. a hexhash cannot
|
||||
* be used in such cases, and b64Hashes are not appropriate for file names
|
||||
* </p>
|
||||
*
|
||||
* @param b64Hash a base64 hash
|
||||
* @return the octal representation of the given base64 hash
|
||||
*/
|
||||
|
@ -430,11 +512,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* <code>12 * 6 bit = 72 bit = 18</code> characters hex-hash
|
||||
*
|
||||
* @param b64Hash a base64 hash
|
||||
* @return the hexadecimal representation of the given base64 hash
|
||||
*/
|
||||
public static String b64Hash2hexHash(final String b64Hash) {
|
||||
if (b64Hash.length() > 12) return "";
|
||||
if ( b64Hash.length() > 12 ) {
|
||||
return "";
|
||||
}
|
||||
// the hash string represents 12 * 6 bit = 72 bits. This is too much for a long integer.
|
||||
return Digest.encodeHex(Base64Order.enhancedCoder.decode(b64Hash));
|
||||
}
|
||||
|
@ -449,6 +534,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* The returned version follows this pattern: <code>MAJORVERSION . MINORVERSION 0 SVN REVISION</code>
|
||||
*
|
||||
* @return the YaCy version of this peer as a float or <code>0</code> if no valid value could be retrieved
|
||||
* from this yacySeed object
|
||||
*/
|
||||
|
@ -462,6 +548,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* get the SVN version of the peer
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public final int getRevision() {
|
||||
|
@ -474,10 +561,14 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
*/
|
||||
public final String getPublicAddress() {
|
||||
String ip = getIP();
|
||||
if (ip == null || ip.length() < 8 || ip.length() > 60) ip = "127.0.0.1";
|
||||
if ( ip == null || ip.length() < 8 || ip.length() > 60 ) {
|
||||
ip = "127.0.0.1";
|
||||
}
|
||||
|
||||
final String port = this.dna.get(Seed.PORT);
|
||||
if (port == null || port.length() < 2 || port.length() > 5) return null;
|
||||
if ( port == null || port.length() < 2 || port.length() > 5 ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final StringBuilder sb = new StringBuilder(ip.length() + port.length() + 1);
|
||||
sb.append(ip);
|
||||
|
@ -488,16 +579,21 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* If this seed is part of a cluster, the peer has probably the {@linkplain #alternativeIP} object set to
|
||||
* a local IP. If this is present and the public IP of this peer is identical to the public IP of the own seed,
|
||||
* construct an address using this IP; otherwise return the public address
|
||||
* a local IP. If this is present and the public IP of this peer is identical to the public IP of the own
|
||||
* seed, construct an address using this IP; otherwise return the public address
|
||||
*
|
||||
* @see #getPublicAddress()
|
||||
* @return the alternative IP:port if present, else the public address
|
||||
*/
|
||||
public final String getClusterAddress() {
|
||||
if (this.alternativeIP == null) return getPublicAddress();
|
||||
if ( this.alternativeIP == null ) {
|
||||
return getPublicAddress();
|
||||
}
|
||||
|
||||
final String port = this.dna.get(Seed.PORT);
|
||||
if ((port == null) || (port.length() < 2)) return null;
|
||||
if ( (port == null) || (port.length() < 2) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return this.alternativeIP + ":" + port;
|
||||
}
|
||||
|
@ -512,7 +608,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
/** @return the portnumber of this seed or <code>-1</code> if not present */
|
||||
public final int getPort() {
|
||||
final String port = this.dna.get(Seed.PORT);
|
||||
if (port == null) return -1;
|
||||
if ( port == null ) {
|
||||
return -1;
|
||||
}
|
||||
/*if (port.length() < 2) return -1; It is possible to use port 0-9*/
|
||||
return Integer.parseInt(port);
|
||||
}
|
||||
|
@ -522,8 +620,11 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
// because java thinks it must apply the UTC offset to the current time,
|
||||
// to create a string that looks like our current time, it adds the local UTC offset to the
|
||||
// time. To create a corrected UTC Date string, we first subtract the local UTC offset.
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
final String ls = my_SHORT_SECOND_FORMATTER.format(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/));
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER =
|
||||
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
final String ls =
|
||||
my_SHORT_SECOND_FORMATTER
|
||||
.format(new Date(System.currentTimeMillis() /*- DateFormatter.UTCDiff()*/));
|
||||
//System.out.println("SETTING LAST-SEEN of " + this.getName() + " to " + ls);
|
||||
this.dna.put(Seed.LASTSEEN, ls);
|
||||
}
|
||||
|
@ -533,7 +634,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
*/
|
||||
public final long getLastSeenUTC() {
|
||||
try {
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER =
|
||||
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
final long t = my_SHORT_SECOND_FORMATTER.parse(get(Seed.LASTSEEN, "20040101000000")).getTime();
|
||||
// getTime creates a UTC time number. But in this case java thinks, that the given
|
||||
// time string is a local time, which has a local UTC offset applied.
|
||||
|
@ -551,6 +653,7 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
/**
|
||||
* test if the lastSeen time of the seed has a time-out
|
||||
*
|
||||
* @param milliseconds the maximum age of the last-seen value
|
||||
* @return true, if the time between the last-seen time and now is greater then the given time-out
|
||||
*/
|
||||
|
@ -560,10 +663,13 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
}
|
||||
|
||||
public final long getBirthdate() {
|
||||
if (this.birthdate > 0) return this.birthdate;
|
||||
if ( this.birthdate > 0 ) {
|
||||
return this.birthdate;
|
||||
}
|
||||
long b;
|
||||
try {
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER = new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
final GenericFormatter my_SHORT_SECOND_FORMATTER =
|
||||
new GenericFormatter(GenericFormatter.FORMAT_SHORT_SECOND, GenericFormatter.time_second); // use our own formatter to prevent concurrency locks with other processes
|
||||
b = my_SHORT_SECOND_FORMATTER.parse(get(Seed.BDATE, "20040101000000")).getTime();
|
||||
} catch ( final ParseException e ) {
|
||||
b = System.currentTimeMillis();
|
||||
|
@ -587,11 +693,15 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
public boolean matchPeerTags(final HandleSet searchHashes) {
|
||||
final String peertags = get(PEERTAGS, "");
|
||||
if (peertags.equals("*")) return true;
|
||||
if ( peertags.equals("*") ) {
|
||||
return true;
|
||||
}
|
||||
final Set<String> tags = MapTools.string2set(peertags, "|");
|
||||
final Iterator<String> i = tags.iterator();
|
||||
while ( i.hasNext() ) {
|
||||
if (searchHashes.has(Word.word2hash(i.next()))) return true;
|
||||
if ( searchHashes.has(Word.word2hash(i.next())) ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -635,52 +745,79 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
private void setFlag(final int flag, final boolean value) {
|
||||
String flags = get(Seed.FLAGS, Seed.FLAGSZERO);
|
||||
if (flags.length() != 4) { flags = Seed.FLAGSZERO; }
|
||||
if ( flags.length() != 4 ) {
|
||||
flags = Seed.FLAGSZERO;
|
||||
}
|
||||
final bitfield f = new bitfield(UTF8.getBytes(flags));
|
||||
f.set(flag, value);
|
||||
this.dna.put(Seed.FLAGS, UTF8.String(f.getBytes()));
|
||||
}
|
||||
|
||||
public final void setFlagDirectConnect(final boolean value) { setFlag(FLAG_DIRECT_CONNECT, value); }
|
||||
public final void setFlagAcceptRemoteCrawl(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_CRAWL, value); }
|
||||
public final void setFlagAcceptRemoteIndex(final boolean value) { setFlag(FLAG_ACCEPT_REMOTE_INDEX, value); }
|
||||
public final boolean getFlagDirectConnect() { return getFlag(0); }
|
||||
public final void setFlagDirectConnect(final boolean value) {
|
||||
setFlag(FLAG_DIRECT_CONNECT, value);
|
||||
}
|
||||
|
||||
public final void setFlagAcceptRemoteCrawl(final boolean value) {
|
||||
setFlag(FLAG_ACCEPT_REMOTE_CRAWL, value);
|
||||
}
|
||||
|
||||
public final void setFlagAcceptRemoteIndex(final boolean value) {
|
||||
setFlag(FLAG_ACCEPT_REMOTE_INDEX, value);
|
||||
}
|
||||
|
||||
public final boolean getFlagDirectConnect() {
|
||||
return getFlag(0);
|
||||
}
|
||||
|
||||
public final boolean getFlagAcceptRemoteCrawl() {
|
||||
//if (getVersion() < 0.300) return false;
|
||||
//if (getVersion() < 0.334) return true;
|
||||
return getFlag(1);
|
||||
}
|
||||
|
||||
public final boolean getFlagAcceptRemoteIndex() {
|
||||
//if (getVersion() < 0.335) return false;
|
||||
return getFlag(2);
|
||||
}
|
||||
|
||||
public final void setUnusedFlags() {
|
||||
for (int i = 4; i < 24; i++) { setFlag(i, true); }
|
||||
for ( int i = 4; i < 24; i++ ) {
|
||||
setFlag(i, true);
|
||||
}
|
||||
}
|
||||
|
||||
public final boolean isType(final String type) {
|
||||
return get(Seed.PEERTYPE, "").equals(type);
|
||||
}
|
||||
|
||||
public final boolean isVirgin() {
|
||||
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_VIRGIN);
|
||||
}
|
||||
|
||||
public final boolean isJunior() {
|
||||
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_JUNIOR);
|
||||
}
|
||||
|
||||
public final boolean isSenior() {
|
||||
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_SENIOR);
|
||||
}
|
||||
|
||||
public final boolean isPrincipal() {
|
||||
return get(Seed.PEERTYPE, "").equals(Seed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
||||
public final boolean isPotential() {
|
||||
return isVirgin() || isJunior();
|
||||
}
|
||||
|
||||
public final boolean isActive() {
|
||||
return isSenior() || isPrincipal();
|
||||
}
|
||||
|
||||
public final boolean isOnline() {
|
||||
return isSenior() || isPrincipal();
|
||||
}
|
||||
|
||||
public final boolean isOnline(final String type) {
|
||||
return type.equals(Seed.PEERTYPE_SENIOR) || type.equals(Seed.PEERTYPE_PRINCIPAL);
|
||||
}
|
||||
|
@ -702,13 +839,19 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
String interval = null;
|
||||
while ( !gaps.isEmpty() ) {
|
||||
interval = gaps.remove(gaps.lastKey());
|
||||
if (random.nextBoolean()) break;
|
||||
if ( random.nextBoolean() ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( interval == null ) {
|
||||
return randomHash();
|
||||
}
|
||||
if (interval == null) return randomHash();
|
||||
|
||||
// find dht position and size of gap
|
||||
final long left = FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(0, 12)), null);
|
||||
final long right = FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(12)), null);
|
||||
final long left =
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(0, 12)), null);
|
||||
final long right =
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(interval.substring(12)), null);
|
||||
final long gap8 = FlatWordPartitionScheme.dhtDistance(left, right) >> 3; // 1/8 of a gap
|
||||
final long gapx = gap8 + (Math.abs(random.nextLong()) % (6 * gap8));
|
||||
final long gappos = (Long.MAX_VALUE - left >= gapx) ? left + gapx : (left - Long.MAX_VALUE) + gapx;
|
||||
|
@ -725,7 +868,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
combined[1] = randomHash[1];
|
||||
}
|
||||
// finally check if the hash is already known
|
||||
while (seedDB.hasConnected(combined) || seedDB.hasDisconnected(combined) || seedDB.hasPotential(combined)) {
|
||||
while ( seedDB.hasConnected(combined)
|
||||
|| seedDB.hasDisconnected(combined)
|
||||
|| seedDB.hasPotential(combined) ) {
|
||||
// if we are lucky then this loop will never run
|
||||
combined = randomHash();
|
||||
}
|
||||
|
@ -734,7 +879,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
private static TreeMap<Long, String> hashGaps(final SeedDB seedDB) {
|
||||
final TreeMap<Long, String> gaps = new TreeMap<Long, String>();
|
||||
if (seedDB == null) return gaps;
|
||||
if ( seedDB == null ) {
|
||||
return gaps;
|
||||
}
|
||||
|
||||
final Iterator<Seed> i = seedDB.seedsConnected(true, false, null, (float) 0.0);
|
||||
long l;
|
||||
|
@ -746,7 +893,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
first = s0;
|
||||
continue;
|
||||
}
|
||||
l = FlatWordPartitionScheme.dhtDistance(
|
||||
l =
|
||||
FlatWordPartitionScheme.dhtDistance(
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s0.hash), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s1.hash), null));
|
||||
gaps.put(l, s0.hash + s1.hash);
|
||||
|
@ -754,7 +902,8 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
}
|
||||
// compute also the last gap
|
||||
if ( (first != null) && (s0 != null) ) {
|
||||
l = FlatWordPartitionScheme.dhtDistance(
|
||||
l =
|
||||
FlatWordPartitionScheme.dhtDistance(
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(s0.hash), null),
|
||||
FlatWordPartitionScheme.std.dhtPosition(ASCII.getBytes(first.hash), null));
|
||||
gaps.put(l, s0.hash + first.hash);
|
||||
|
@ -785,26 +934,44 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
public static byte[] randomHash() {
|
||||
final String hash =
|
||||
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6) +
|
||||
Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong()))).substring(0, 6);
|
||||
Base64Order.enhancedCoder
|
||||
.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong())))
|
||||
.substring(0, 6)
|
||||
+ Base64Order.enhancedCoder
|
||||
.encode(Digest.encodeMD5Raw(Long.toString(random.nextLong())))
|
||||
.substring(0, 6);
|
||||
return ASCII.getBytes(hash);
|
||||
}
|
||||
|
||||
public static Seed genRemoteSeed(final String seedStr, final String key, final boolean ownSeed, final String patchIP) throws IOException {
|
||||
public static Seed genRemoteSeed(
|
||||
final String seedStr,
|
||||
final String key,
|
||||
final boolean ownSeed,
|
||||
final String patchIP) throws IOException {
|
||||
// this method is used to convert the external representation of a seed into a seed object
|
||||
// yacyCore.log.logFinest("genRemoteSeed: seedStr=" + seedStr + " key=" + key);
|
||||
|
||||
// check protocol and syntax of seed
|
||||
if (seedStr == null) throw new IOException("seedStr == null");
|
||||
if (seedStr.length() == 0) throw new IOException("seedStr.length() == 0");
|
||||
if ( seedStr == null ) {
|
||||
throw new IOException("seedStr == null");
|
||||
}
|
||||
if ( seedStr.length() == 0 ) {
|
||||
throw new IOException("seedStr.length() == 0");
|
||||
}
|
||||
final String seed = crypt.simpleDecode(seedStr, key);
|
||||
if (seed == null) throw new IOException("seed == null");
|
||||
if (seed.length() == 0) throw new IOException("seed.length() == 0");
|
||||
if ( seed == null ) {
|
||||
throw new IOException("seed == null");
|
||||
}
|
||||
if ( seed.length() == 0 ) {
|
||||
throw new IOException("seed.length() == 0");
|
||||
}
|
||||
|
||||
// extract hash
|
||||
final ConcurrentHashMap<String, String> dna = MapTools.string2map(seed, ",");
|
||||
final String hash = dna.remove(Seed.HASH);
|
||||
if (hash == null) throw new IOException("hash == null");
|
||||
if ( hash == null ) {
|
||||
throw new IOException("hash == null");
|
||||
}
|
||||
final Seed resultSeed = new Seed(hash, dna);
|
||||
|
||||
// check semantics of content
|
||||
|
@ -816,7 +983,9 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
resultSeed.setIP(patchIP);
|
||||
testResult = resultSeed.isProper(ownSeed);
|
||||
}
|
||||
if (testResult != null) throw new IOException("seed is not proper (" + testResult + "): " + resultSeed);
|
||||
if ( testResult != null ) {
|
||||
throw new IOException("seed is not proper (" + testResult + "): " + resultSeed);
|
||||
}
|
||||
|
||||
// seed ok
|
||||
return resultSeed;
|
||||
|
@ -827,36 +996,52 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
// checks if everything is ok with that seed
|
||||
|
||||
// check hash
|
||||
if (this.hash == null) return "hash is null";
|
||||
if (this.hash.length() != Word.commonHashLength) return "wrong hash length (" + this.hash.length() + ")";
|
||||
if ( this.hash == null ) {
|
||||
return "hash is null";
|
||||
}
|
||||
if ( this.hash.length() != Word.commonHashLength ) {
|
||||
return "wrong hash length (" + this.hash.length() + ")";
|
||||
}
|
||||
|
||||
// name
|
||||
final String peerName = this.dna.get(Seed.NAME);
|
||||
if (peerName == null) return "no peer name given";
|
||||
if ( peerName == null ) {
|
||||
return "no peer name given";
|
||||
}
|
||||
this.dna.put(Seed.NAME, checkPeerName(peerName));
|
||||
|
||||
// type
|
||||
final String peerType = getPeerType();
|
||||
if ((peerType == null) ||
|
||||
!(peerType.equals(Seed.PEERTYPE_VIRGIN) || peerType.equals(Seed.PEERTYPE_JUNIOR)
|
||||
|| peerType.equals(Seed.PEERTYPE_SENIOR) || peerType.equals(Seed.PEERTYPE_PRINCIPAL)))
|
||||
if ( (peerType == null)
|
||||
|| !(peerType.equals(Seed.PEERTYPE_VIRGIN)
|
||||
|| peerType.equals(Seed.PEERTYPE_JUNIOR)
|
||||
|| peerType.equals(Seed.PEERTYPE_SENIOR) || peerType.equals(Seed.PEERTYPE_PRINCIPAL)) ) {
|
||||
return "invalid peerType '" + peerType + "'";
|
||||
}
|
||||
|
||||
// check IP
|
||||
if ( !checkOwnIP ) {
|
||||
// checking of IP is omitted if we read the own seed file
|
||||
final String ipCheck = isProperIP(getIP());
|
||||
if (ipCheck != null) return ipCheck;
|
||||
if ( ipCheck != null ) {
|
||||
return ipCheck;
|
||||
}
|
||||
}
|
||||
|
||||
// seedURL
|
||||
final String seedURL = this.dna.get(SEEDLISTURL);
|
||||
if ( seedURL != null && seedURL.length() > 0 ) {
|
||||
if (!seedURL.startsWith("http://") && !seedURL.startsWith("https://")) return "wrong protocol for seedURL";
|
||||
if ( !seedURL.startsWith("http://") && !seedURL.startsWith("https://") ) {
|
||||
return "wrong protocol for seedURL";
|
||||
}
|
||||
try {
|
||||
final URL url = new URL(seedURL);
|
||||
final String host = url.getHost();
|
||||
if (host.equals("localhost") || host.startsWith("127.") || (host.startsWith("0:0:0:0:0:0:0:1"))) return "seedURL in localhost rejected";
|
||||
if ( host.equals("localhost")
|
||||
|| host.startsWith("127.")
|
||||
|| (host.startsWith("0:0:0:0:0:0:0:1")) ) {
|
||||
return "seedURL in localhost rejected";
|
||||
}
|
||||
} catch ( final MalformedURLException e ) {
|
||||
return "seedURL malformed";
|
||||
}
|
||||
|
@ -866,12 +1051,20 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
|
||||
public static final String isProperIP(final String ipString) {
|
||||
// returns null if ipString is proper, a string with the cause otherwise
|
||||
if (ipString == null) return ipString + " -> IP is null";
|
||||
if (ipString.length() > 0 && ipString.length() < 8) return ipString + " -> IP is too short: ";
|
||||
if (Switchboard.getSwitchboard().isAllIPMode()) return null;
|
||||
if ( ipString == null ) {
|
||||
return ipString + " -> IP is null";
|
||||
}
|
||||
if ( ipString.length() > 0 && ipString.length() < 8 ) {
|
||||
return ipString + " -> IP is too short: ";
|
||||
}
|
||||
if ( Switchboard.getSwitchboard().isAllIPMode() ) {
|
||||
return null;
|
||||
}
|
||||
final boolean islocal = Domains.isLocal(ipString, null);
|
||||
//if (islocal && Switchboard.getSwitchboard().isGlobalMode()) return ipString + " - local IP for global mode rejected";
|
||||
if (!islocal && Switchboard.getSwitchboard().isIntranetMode()) return ipString + " - global IP for intranet mode rejected";
|
||||
if ( !islocal && Switchboard.getSwitchboard().isIntranetMode() ) {
|
||||
return ipString + " - global IP for intranet mode rejected";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -890,7 +1083,11 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
final String b = crypt.simpleEncode(r, key, 'b');
|
||||
// the compressed string may be longer that the uncompressed if there is too much overhead for compression meta-info
|
||||
// take simply that string that is shorter
|
||||
if (b.length() < z.length()) return b; else return z;
|
||||
if ( b.length() < z.length() ) {
|
||||
return b;
|
||||
} else {
|
||||
return z;
|
||||
}
|
||||
}
|
||||
|
||||
public final void save(final File f) throws IOException {
|
||||
|
@ -923,8 +1120,12 @@ public class Seed implements Cloneable, Comparable<Seed>, Comparator<Seed> {
|
|||
// TODO Auto-generated method stub
|
||||
final int o1 = hashCode();
|
||||
final int o2 = arg0.hashCode();
|
||||
if (o1 > o2) return 1;
|
||||
if (o2 > o1) return -1;
|
||||
if ( o1 > o2 ) {
|
||||
return 1;
|
||||
}
|
||||
if ( o2 > o1 ) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
0
source/net/yacy/peers/dht/Dispatcher.java
Executable file → Normal file
0
source/net/yacy/peers/dht/Dispatcher.java
Executable file → Normal file
0
source/net/yacy/peers/dht/FlatWordPartitionScheme.java
Executable file → Normal file
0
source/net/yacy/peers/dht/FlatWordPartitionScheme.java
Executable file → Normal file
0
source/net/yacy/peers/dht/PartitionScheme.java
Executable file → Normal file
0
source/net/yacy/peers/dht/PartitionScheme.java
Executable file → Normal file
0
source/net/yacy/peers/dht/PeerSelection.java
Executable file → Normal file
0
source/net/yacy/peers/dht/PeerSelection.java
Executable file → Normal file
0
source/net/yacy/peers/dht/VerticalWordPartitionScheme.java
Executable file → Normal file
0
source/net/yacy/peers/dht/VerticalWordPartitionScheme.java
Executable file → Normal file
|
@ -29,13 +29,14 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
|
@ -91,22 +92,22 @@ public class Blacklist {
|
|||
}));
|
||||
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
|
||||
private File blacklistRootPath = null;
|
||||
private final Map<String, HandleSet> cachedUrlHashs;
|
||||
private final Map<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
|
||||
private final Map<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
|
||||
private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
|
||||
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
|
||||
private final ConcurrentMap<String, Map<String, List<String>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
|
||||
|
||||
public Blacklist(final File rootPath) {
|
||||
|
||||
setRootPath(rootPath);
|
||||
|
||||
// prepare the data structure
|
||||
this.hostpaths_matchable = new HashMap<String, Map<String, List<String>>>();
|
||||
this.hostpaths_notmatchable = new HashMap<String, Map<String, List<String>>>();
|
||||
this.cachedUrlHashs = new HashMap<String, HandleSet>();
|
||||
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
|
||||
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<String>>>();
|
||||
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
|
||||
|
||||
for (final String blacklistType : BLACKLIST_TYPES) {
|
||||
this.hostpaths_matchable.put(blacklistType, new HashMap<String, List<String>>());
|
||||
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, List<String>>());
|
||||
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
|
||||
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<String>>());
|
||||
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ import de.anomic.crawler.retrieval.HTTPLoader;
|
|||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.crawler.retrieval.Response;
|
||||
import de.anomic.crawler.retrieval.SMBLoader;
|
||||
import de.anomic.crawler.ZURL.FailCategory;
|
||||
import de.anomic.http.client.Cache;
|
||||
|
||||
public final class LoaderDispatcher {
|
||||
|
@ -137,7 +138,7 @@ public final class LoaderDispatcher {
|
|||
|
||||
public void load(final DigestURI url, final CacheStrategy cacheStratgy, final int maxFileSize, final File targetFile) throws IOException {
|
||||
|
||||
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, false).getContent();
|
||||
final byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, true).getContent();
|
||||
if (b == null) throw new IOException("load == null");
|
||||
final File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
|
||||
|
||||
|
@ -190,6 +191,12 @@ public final class LoaderDispatcher {
|
|||
final String protocol = url.getProtocol();
|
||||
final String host = url.getHost();
|
||||
|
||||
// check if url is in blacklist
|
||||
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
|
||||
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
|
||||
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
|
||||
}
|
||||
|
||||
// check if we have the page in the cache
|
||||
final CrawlProfile crawlProfile = this.sb.crawler.getActive(UTF8.getBytes(request.profileHandle()));
|
||||
if (crawlProfile != null && cacheStrategy != CacheStrategy.NOCACHE) {
|
||||
|
@ -324,7 +331,7 @@ public final class LoaderDispatcher {
|
|||
*/
|
||||
public byte[] loadContent(final Request request, final CacheStrategy cacheStrategy) throws IOException {
|
||||
// try to download the resource using the loader
|
||||
final Response entry = load(request, cacheStrategy, false);
|
||||
final Response entry = load(request, cacheStrategy, true);
|
||||
if (entry == null) return null; // not found in web
|
||||
|
||||
// read resource body (if it is there)
|
||||
|
@ -334,7 +341,7 @@ public final class LoaderDispatcher {
|
|||
public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int timeout, final int maxFileSize) throws IOException, Parser.Failure {
|
||||
|
||||
// load resource
|
||||
final Response response = load(request, cacheStrategy, maxFileSize, false);
|
||||
final Response response = load(request, cacheStrategy, maxFileSize, true);
|
||||
final DigestURI url = request.url();
|
||||
if (response == null) throw new IOException("no Response for url " + url);
|
||||
|
||||
|
@ -347,7 +354,7 @@ public final class LoaderDispatcher {
|
|||
|
||||
public ContentScraper parseResource(final DigestURI location, final CacheStrategy cachePolicy) throws IOException {
|
||||
// load page
|
||||
final Response r = this.load(request(location, true, false), cachePolicy, false);
|
||||
final Response r = this.load(request(location, true, false), cachePolicy, true);
|
||||
final byte[] page = (r == null) ? null : r.getContent();
|
||||
if (page == null) throw new IOException("no response from url " + location.toString());
|
||||
|
||||
|
@ -366,7 +373,7 @@ public final class LoaderDispatcher {
|
|||
* @throws IOException
|
||||
*/
|
||||
public final Map<MultiProtocolURI, String> loadLinks(final DigestURI url, final CacheStrategy cacheStrategy) throws IOException {
|
||||
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, false);
|
||||
final Response response = load(request(url, true, false), cacheStrategy, Integer.MAX_VALUE, true);
|
||||
if (response == null) throw new IOException("response == null");
|
||||
final ResponseHeader responseHeader = response.getResponseHeader();
|
||||
if (response.getContent() == null) throw new IOException("resource == null");
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -278,8 +278,8 @@ public final class SwitchboardConstants {
|
|||
|
||||
public static final String CLUSTER_MODE = "cluster.mode";
|
||||
public static final String CLUSTER_MODE_PUBLIC_CLUSTER = "publiccluster";
|
||||
public static final String CLUSTER_MODE_PRIVATE_CLUSTER = "privatecluster";
|
||||
public static final String CLUSTER_MODE_PUBLIC_PEER = "publicpeer";
|
||||
public static final String CLUSTER_MODE_PRIVATE_PEER = "privatepeer";
|
||||
public static final String CLUSTER_PEERS_IPPORT = "cluster.peers.ipport";
|
||||
|
||||
public static final String DHT_BURST_ROBINSON = "network.unit.dht.burst.robinson";
|
||||
|
|
|
@ -93,6 +93,7 @@ public final class RWIProcess extends Thread
|
|||
private final ReferenceOrder order;
|
||||
private final long startTime;
|
||||
private boolean addRunning;
|
||||
private boolean fresh;
|
||||
|
||||
// navigation scores
|
||||
private final ScoreMap<String> hostNavigator; // a counter for the appearance of the host hash
|
||||
|
@ -136,6 +137,7 @@ public final class RWIProcess extends Thread
|
|||
this.maxExpectedRemoteReferences = new AtomicInteger(0);
|
||||
this.expectedRemoteReferences = new AtomicInteger(0);
|
||||
this.receivedRemoteReferences = new AtomicInteger(0);
|
||||
this.fresh = true;
|
||||
}
|
||||
|
||||
public void addExpectedRemoteReferences(int x) {
|
||||
|
@ -388,10 +390,11 @@ public final class RWIProcess extends Thread
|
|||
|
||||
public void oneFeederStarted() {
|
||||
this.feeders.addAndGet(1);
|
||||
this.fresh = false;
|
||||
}
|
||||
|
||||
public boolean feedingIsFinished() {
|
||||
return this.feeders.get() <= 0;
|
||||
return !this.fresh && this.feeders.get() <= 0;
|
||||
}
|
||||
|
||||
private boolean testFlags(final WordReference ientry) {
|
||||
|
|
|
@ -27,6 +27,7 @@ package net.yacy.search.snippet;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -48,7 +49,10 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
|
|||
import net.yacy.kelondro.logging.Log;
|
||||
import net.yacy.kelondro.order.Base64Order;
|
||||
import net.yacy.kelondro.util.ByteArray;
|
||||
import net.yacy.repository.Blacklist;
|
||||
import net.yacy.search.Switchboard;
|
||||
import de.anomic.crawler.retrieval.Request;
|
||||
import de.anomic.crawler.ZURL.FailCategory;
|
||||
|
||||
|
||||
public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
|
||||
|
@ -165,6 +169,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|||
entry = i.next();
|
||||
url = new DigestURI(entry.getKey());
|
||||
desc = entry.getValue();
|
||||
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
|
||||
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
|
||||
removeAppearanceHashes(desc, queryhashes).size();
|
||||
if (ranking < 2 * queryhashes.size()) {
|
||||
|
@ -189,6 +194,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|||
ientry = i.next();
|
||||
url = new DigestURI(ientry.url());
|
||||
final String u = url.toString();
|
||||
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
|
||||
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
|
||||
if (ientry.height() > 0 && ientry.height() < 32) continue;
|
||||
if (ientry.width() > 0 && ientry.width() < 32) continue;
|
||||
|
@ -230,4 +236,27 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
|
|||
return remaininghashes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks wether given URL is in blacklist for given blacklist type
|
||||
*
|
||||
* @param url The URL to check
|
||||
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
|
||||
* @return isBlacklisted Wether the given URL is blacklisted
|
||||
*/
|
||||
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) {
|
||||
// Default is not blacklisted
|
||||
boolean isBlacklisted = false;
|
||||
|
||||
// check if url is in blacklist
|
||||
if (Switchboard.urlBlacklist.isListed(blacklistType, url.getHost().toLowerCase(), url.getFile())) {
|
||||
Switchboard.getSwitchboard().crawlQueues.errorURL.push(new Request(url, null), Switchboard.getSwitchboard().peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
|
||||
Log.logFine("snippet fetch", "MEDIA-SNIPPET Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
|
||||
isBlacklisted = true;
|
||||
}
|
||||
|
||||
// Return result
|
||||
return isBlacklisted;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user