yacy_search_server/source/de/anomic/yacy/yacyPeerActions.java
orbiter d2ba1fd2ab major step forward to network switching (target is easy switch to intranet or other networks .. and back)
This change is inspired by the need to see a network connected to the index it creates in a indexing team.
It is not possible to divide the network and the index. Therefore all control files for the network was moved to the network within the INDEX/<network-name> subfolder.
The remaining YACYDB is superfluous and can be deleted.
The yacyDB and yacyNews data structures are now part of plasmaWordIndex. Therefore all methods, using static access to yacySeedDB had to be rewritten. A special problem had been all the port forwarding methods which had been tightly mixed with seed construction. It was not possible to move the port forwarding functions to the place, meaning and usage of plasmaWordIndex. Therefore the port forwarding had been deleted (I guess nobody used it and it can be simulated by methods outside of YaCy).
The mySeed.txt is automatically moved to the current network position. A new effect causes that every network will create a different local seed file, which is ok, since the seed identifies the peer only against the network (it is the purpose of the seed hash to give a peer a location within the DHT).
No other functional change has been made. The next steps to enable network switcing are:
- shift of crawler tables from PLASMADB into the network (crawls are also network-specific)
- possibly shift of plasmaWordIndex code into yacy package (index management is network-specific)
- servlet to switch networks 

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4765 6c8d7289-2bf4-0310-a012-ef5d649a1542
2008-05-05 23:13:47 +00:00

398 lines
20 KiB
Java

// yacyPeerActions.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// Using this software in any meaning (reading, learning, copying, compiling,
// running) means that you agree that the Author(s) is (are) not responsible
// for cost, loss of data or any harm that may be caused directly or indirectly
// by usage of this softare or this documentation. The usage of this software
// is on your own risk. The installation and usage (starting/running) of this
// software may allow other people or application to access your computer and
// any attached devices and is highly dependent on the configuration of the
// software which must be done by the user of the software; the author(s) is
// (are) also not responsible for proper configuration and usage of the
// software, even if provoked by documentation provided together with
// the software.
//
// Any changes to this file according to the GPL as documented in the file
// gpl.txt aside this file in the shipment you received can be done to the
// lines that follows this copyright notice here, but changes must not be
// done inside the copyright notive above. A re-distribution must contain
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
package de.anomic.yacy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlNURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverDate;
import de.anomic.tools.nxTools;
public class yacyPeerActions {
private yacySeedDB seedDB;
private plasmaSwitchboard sb;
private HashSet<yacyPeerAction> actions;
private HashMap<String, String> userAgents;
public long juniorConnects;
public long seniorConnects;
public long principalConnects;
public long disconnects;
private int bootstrapLoadTimeout;
public yacyPeerActions(yacySeedDB seedDB, plasmaSwitchboard switchboard) {
this.seedDB = seedDB;
this.sb = switchboard;
this.actions = new HashSet<yacyPeerAction>();
this.userAgents = new HashMap<String, String>();
this.juniorConnects = 0;
this.seniorConnects = 0;
this.principalConnects = 0;
this.disconnects = 0;
this.bootstrapLoadTimeout = (int) switchboard.getConfigLong("bootstrapLoadTimeout", 6000);
}
public void deploy(yacyPeerAction action) {
actions.add(action);
}
public void updateMySeed() {
if (sb.getConfig("peerName", "anomic").equals("anomic")) {
// generate new peer name
sb.setConfig("peerName", yacySeed.makeDefaultPeerName());
}
seedDB.mySeed().put(yacySeed.NAME, sb.getConfig("peerName", "nameless"));
seedDB.mySeed().put(yacySeed.PORT, Integer.toString(serverCore.getPortNr(sb.getConfig("port", "8080"))));
long uptime = (System.currentTimeMillis() - serverCore.startupTime) / 1000;
long uptimediff = uptime - sb.lastseedcheckuptime;
long indexedcdiff = sb.indexedPages - sb.lastindexedPages;
//double requestcdiff = sb.requestedQueries - sb.lastrequestedQueries;
if (uptimediff > 300 || uptimediff <= 0 || sb.lastseedcheckuptime == -1 ) {
sb.lastseedcheckuptime = uptime;
sb.lastindexedPages = sb.indexedPages;
sb.lastrequestedQueries = sb.requestedQueries;
}
//the speed of indexing (pages/minute) of the peer
sb.totalPPM = (int) (sb.indexedPages * 60 / Math.max(uptime, 1));
seedDB.mySeed().put(yacySeed.ISPEED, Long.toString(Math.round(Math.max((float) indexedcdiff, 0f) * 60f / Math.max((float) uptimediff, 1f))));
sb.totalQPM = sb.requestedQueries * 60d / Math.max((double) uptime, 1d);
seedDB.mySeed().put(yacySeed.RSPEED, Double.toString(sb.totalQPM /*Math.max((float) requestcdiff, 0f) * 60f / Math.max((float) uptimediff, 1f)*/ ));
seedDB.mySeed().put(yacySeed.UPTIME, Long.toString(uptime/60)); // the number of minutes that the peer is up in minutes/day (moving average MA30)
seedDB.mySeed().put(yacySeed.LCOUNT, Integer.toString(sb.wordIndex.countURL())); // the number of links that the peer has stored (LURL's)
seedDB.mySeed().put(yacySeed.NCOUNT, Integer.toString(sb.crawlQueues.noticeURL.size())); // the number of links that the peer has noticed, but not loaded (NURL's)
seedDB.mySeed().put(yacySeed.RCOUNT, Integer.toString(sb.crawlQueues.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT))); // the number of links that the peer provides for remote crawling (ZURL's)
seedDB.mySeed().put(yacySeed.ICOUNT, Integer.toString(sb.wordIndex.size())); // the minimum number of words that the peer has indexed (as it says)
seedDB.mySeed().put(yacySeed.SCOUNT, Integer.toString(seedDB.sizeConnected())); // the number of seeds that the peer has stored
seedDB.mySeed().put(yacySeed.CCOUNT, Double.toString(((int) ((seedDB.sizeConnected() + seedDB.sizeDisconnected() + seedDB.sizePotential()) * 60.0 / (uptime + 1.01)) * 100) / 100.0)); // the number of clients that the peer connects (as connects/hour)
seedDB.mySeed().put(yacySeed.VERSION, sb.getConfig("version", ""));
if (seedDB.mySeed().get(yacySeed.PEERTYPE,"").equals(yacySeed.PEERTYPE_PRINCIPAL)) {
// attach information about seed location
seedDB.mySeed().put("seedURL", sb.getConfig("seedURL", ""));
}
seedDB.mySeed().setFlagDirectConnect(true);
seedDB.mySeed().setLastSeenUTC();
seedDB.mySeed().put(yacySeed.UTC, serverDate.UTCDiffString());
seedDB.mySeed().setFlagAcceptRemoteCrawl(sb.getConfig("crawlResponse", "").equals("true"));
seedDB.mySeed().setFlagAcceptRemoteIndex(sb.getConfig("allowReceiveIndex", "").equals("true"));
//mySeed.setFlagAcceptRemoteIndex(true);
}
public void saveMySeed() {
try {
seedDB.mySeed().save(sb.getOwnSeedFile());
} catch (IOException e) {}
}
public void loadSeedLists() {
// uses the superseed to initialize the database with known seeds
yacySeed ys;
String seedListFileURL;
yacyURL url;
ArrayList<String> seedList;
Iterator<String> enu;
int lc;
int sc = seedDB.sizeConnected();
httpHeader header;
yacyCore.log.logInfo("BOOTSTRAP: " + sc + " seeds known from previous run");
// - use the superseed to further fill up the seedDB
int ssc = 0, c = 0;
while (true) {
if (Thread.currentThread().isInterrupted()) break;
seedListFileURL = sb.getConfig("network.unit.bootstrap.seedlist" + c, "");
if (seedListFileURL.length() == 0) break;
c++;
if (
seedListFileURL.startsWith("http://") ||
seedListFileURL.startsWith("https://")
) {
// load the seed list
try {
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA,"no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache");
url = new yacyURL(seedListFileURL, null);
long start = System.currentTimeMillis();
header = HttpClient.whead(url.toString(), reqHeader);
long loadtime = System.currentTimeMillis() - start;
if (header == null) {
if (loadtime > this.bootstrapLoadTimeout) {
yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, time-out after " + loadtime + " milliseconds");
} else {
yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not available, no content");
}
} else if (header.lastModified() == null) {
yacyCore.log.logWarning("BOOTSTRAP: seed-list URL " + seedListFileURL + " not usable, last-modified is missing");
} else if ((header.age() > 86400000) && (ssc > 0)) {
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
final byte[] content = HttpClient.wget(url.toString(), reqHeader, null, bootstrapLoadTimeout);
seedList = nxTools.strings(content, "UTF-8");
enu = seedList.iterator();
lc = 0;
while (enu.hasNext()) {
ys = yacySeed.genRemoteSeed((String) enu.next(), null, true);
if ((ys != null) && (ys.isProper() == null) &&
((!seedDB.mySeedIsDefined()) || (seedDB.mySeed().hash != ys.hash))) {
if (connectPeer(ys, false)) lc++;
//seedDB.writeMap(ys.hash, ys.getMap(), "init");
//System.out.println("BOOTSTRAP: received peer " + ys.get(yacySeed.NAME, "anonymous") + "/" + ys.getAddress());
//lc++;
}
}
yacyCore.log.logInfo("BOOTSTRAP: " + lc + " seeds from seed-list URL " + seedListFileURL + ", AGE=" + (header.age() / 3600000) + "h");
}
} catch (IOException e) {
// this is when wget fails, commonly because of timeout
yacyCore.log.logWarning("BOOTSTRAP: failed (1) to load seeds from seed-list URL " + seedListFileURL + ": " + e.getMessage());
} catch (Exception e) {
// this is when wget fails; may be because of missing internet connection
yacyCore.log.logSevere("BOOTSTRAP: failed (2) to load seeds from seed-list URL " + seedListFileURL + ": " + e.getMessage(), e);
}
}
}
yacyCore.log.logInfo("BOOTSTRAP: " + (seedDB.sizeConnected() - sc) + " new seeds while bootstraping.");
}
private synchronized boolean connectPeer(yacySeed seed, boolean direct) {
// store a remote peer's seed
// returns true if the peer is new and previously unknown
if (seed == null) {
yacyCore.log.logSevere("connect: WRONG seed (NULL)");
return false;
}
final String error = seed.isProper();
if (error != null) {
yacyCore.log.logSevere("connect: WRONG seed (" + seed.getName() + "/" + seed.hash + "): " + error);
return false;
}
if ((this.seedDB.mySeedIsDefined()) && (seed.hash.equals(this.seedDB.mySeed().hash))) {
yacyCore.log.logInfo("connect: SELF reference " + seed.getPublicAddress());
return false;
}
final String peerType = seed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN);
if ((peerType.equals(yacySeed.PEERTYPE_VIRGIN)) || (peerType.equals(yacySeed.PEERTYPE_JUNIOR))) {
// reject unqualified seeds
yacyCore.log.logFine("connect: rejecting NOT QUALIFIED " + peerType + " seed " + seed.getName());
return false;
}
final yacySeed doubleSeed = this.seedDB.lookupByIP(seed.getInetAddress(), true, false, false);
if ((doubleSeed != null) && (doubleSeed.getPort() == seed.getPort()) && (!(doubleSeed.hash.equals(seed.hash)))) {
// a user frauds with his peer different peer hashes
yacyCore.log.logFine("connect: rejecting FRAUD (double hashes " + doubleSeed.hash + "/" + seed.hash + " on same port " + seed.getPort() + ") peer " + seed.getName());
return false;
}
if (seed.get(yacySeed.LASTSEEN, "").length() != 14) {
// hack for peers that do not have a LastSeen date
seed.setLastSeenUTC();
yacyCore.log.logFine("connect: reset wrong date (" + seed.getName() + "/" + seed.hash + ")");
}
// connection time
final long nowUTC0Time = System.currentTimeMillis(); // is better to have this value in a variable for debugging
long ctimeUTC0 = seed.getLastSeenUTC();
if (ctimeUTC0 > nowUTC0Time) {
// the peer is future-dated, correct it
seed.setLastSeenUTC();
ctimeUTC0 = nowUTC0Time;
assert (seed.getLastSeenUTC() - ctimeUTC0 < 100);
}
if (Math.abs(nowUTC0Time - ctimeUTC0) > 60 * 60 * 24 * 1000) {
// the new connection is out-of-age, we reject the connection
yacyCore.log.logFine("connect: rejecting out-dated peer '" + seed.getName() + "' from " + seed.getPublicAddress() + "; nowUTC0=" + nowUTC0Time + ", seedUTC0=" + ctimeUTC0 + ", TimeDiff=" + serverDate.formatInterval(Math.abs(nowUTC0Time - ctimeUTC0)));
return false;
}
// disconnection time
long dtimeUTC0;
final yacySeed disconnectedSeed = seedDB.getDisconnected(seed.hash);
if (disconnectedSeed == null) {
dtimeUTC0 = 0; // never disconnected: virtually disconnected maximum time ago
} else {
dtimeUTC0 = disconnectedSeed.getLong("dct", 0);
}
if (direct) {
// remember the moment
// Date applies the local UTC offset, which is wrong
// we correct that by subtracting the local offset and adding
// the remote offset.
seed.setLastSeenUTC();
seed.setFlagDirectConnect(true);
} else {
// set connection flag
if (Math.abs(nowUTC0Time - ctimeUTC0) > 120000) seed.setFlagDirectConnect(false); // 2 minutes
}
// update latest version number
if (seed.getVersion() > yacyVersion.latestRelease) yacyVersion.latestRelease = seed.getVersion();
// prepare to update
if (disconnectedSeed != null) {
// if the indirect connect aims to announce a peer that we know
// has been disconnected then we compare the dates:
// if the new peer has a LastSeen date, and that date is before
// the disconnection date, then we ignore the new peer
if (!direct) {
if (ctimeUTC0 < dtimeUTC0) {
// the disconnection was later, we reject the connection
yacyCore.log.logFine("connect: rejecting disconnected peer '" + seed.getName() + "' from " + seed.getPublicAddress());
return false;
}
}
// this is a return of a lost peer
yacyCore.log.logFine("connect: returned KNOWN " + peerType + " peer '" + seed.getName() + "' from " + seed.getPublicAddress());
this.seedDB.addConnected(seed);
return true;
} else {
final yacySeed connectedSeed = this.seedDB.getConnected(seed.hash);
if (connectedSeed != null) {
// the seed is known: this is an update
try {
// if the old LastSeen date is later then the other
// info, then we reject the info
if ((ctimeUTC0 < (connectedSeed.getLastSeenUTC())) && (!direct)) {
yacyCore.log.logFine("connect: rejecting old info about peer '" + seed.getName() + "'");
return false;
}
/*if (connectedSeed.getName() != seed.getName()) {
// TODO: update seed name lookup cache
}*/
} catch (NumberFormatException e) {
yacyCore.log.logFine("connect: rejecting wrong peer '" + seed.getName() + "' from " + seed.getPublicAddress() + ". Cause: " + e.getMessage());
return false;
}
yacyCore.log.logFine("connect: updated KNOWN " + ((direct) ? "direct " : "") + peerType + " peer '" + seed.getName() + "' from " + seed.getPublicAddress());
seedDB.addConnected(seed);
return true;
} else {
// the seed is new
if ((seedDB.mySeedIsDefined()) && (seed.get(yacySeed.IP, "127.0.0.1").equals(this.seedDB.mySeed().get(yacySeed.IP, "127.0.0.1")))) {
// seed from the same IP as the calling client: can be
// the case if there runs another one over a NAT
yacyCore.log.logFine("connect: saved NEW seed (myself IP) " + seed.getPublicAddress());
} else {
// completely new seed
yacyCore.log.logFine("connect: saved NEW " + peerType + " peer '" + seed.getName() + "' from " + seed.getPublicAddress());
}
if (peerType.equals(yacySeed.PEERTYPE_SENIOR))
this.seniorConnects++; // update statistics
if (peerType.equals(yacySeed.PEERTYPE_PRINCIPAL))
this.principalConnects++; // update statistics
this.seedDB.addConnected(seed);
return true;
}
}
}
private final void disconnectPeer(yacySeed seed, String cause) {
// we do this if we did not get contact with the other peer
yacyCore.log.logFine("connect: no contact to a " + seed.get(yacySeed.PEERTYPE, yacySeed.PEERTYPE_VIRGIN) + " peer '" + seed.getName() + "' at " + seed.getPublicAddress() + ". Cause: " + cause);
synchronized (seedDB) {
if (!seedDB.hasDisconnected(seed.hash)) { disconnects++; }
seed.put("dct", Long.toString(System.currentTimeMillis()));
seedDB.addDisconnected(seed); // update info
}
}
public boolean peerArrival(yacySeed peer, boolean direct) {
if (peer == null) return false;
boolean res = connectPeer(peer, direct);
// perform all actions if peer is effective new
if (res) {
Iterator<yacyPeerAction> i = actions.iterator();
while (i.hasNext()) i.next().processPeerArrival(peer, direct);
}
return res;
}
public void peerDeparture(yacySeed peer, String cause) {
if (peer == null) return;
disconnectPeer(peer, cause);
// perform all actions
Iterator<yacyPeerAction> i = actions.iterator();
while (i.hasNext()) i.next().processPeerDeparture(peer);
}
public void peerPing(yacySeed peer) {
if (peer == null) return;
// this is called only if the peer has junior status
seedDB.addPotential(peer);
// perform all actions
Iterator<yacyPeerAction> i = actions.iterator();
while (i.hasNext()) i.next().processPeerPing(peer);
}
public void setUserAgent(String IP, String userAgent) {
userAgents.put(IP, userAgent);
}
public String getUserAgent(String IP) {
String userAgent = (String) userAgents.get(IP);
return (userAgent == null) ? "" : userAgent;
}
}