yacy_search_server/htroot/yacy/transferURL.java
Michael Peter Christen ccc3760a47 Refactoring and redesign of data architecture to make URIMetadataRow
superfluous. The target is to make a solr document as the core of YaCy
documents which would cause that many conversions can be removed. On the
way to this target the Equivalence of URIMetadataRow and URIMetadataNode
had to be removed to expose the usage of the old URIMetadataRow data
structure.
This refactoring already removes unneccessary conversions and should
make memory usage during indexing lower.
2012-10-18 14:29:11 +02:00

178 lines
8.5 KiB
Java

// transferURL.java
// -----------------------
// part of the AnomicHTTPD caching proxy
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004, 2005
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
// You must compile this file with
// javac -classpath .:../classes transferRWI.java
import java.io.IOException;
import java.text.ParseException;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.crawler.data.ResultURLs;
import net.yacy.crawler.data.ResultURLs.EventOrigin;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.EventChannel;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.server.serverCore;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
public final class transferURL {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) throws InterruptedException {
final long start = System.currentTimeMillis();
long freshdate = 0;
try {freshdate = GenericFormatter.SHORT_DAY_FORMATTER.parse("20061101").getTime();} catch (final ParseException e1) {}
// return variable that accumulates replacements
final Switchboard sb = (Switchboard) env;
final serverObjects prop = new serverObjects();
if ((post == null) || (env == null)) return prop;
if (!Protocol.authentifyRequest(post, env)) return prop;
// request values
final String iam = post.get("iam", ""); // seed hash of requester
final String youare = post.get("youare", ""); // seed hash of the target peer, needed for network stability
// final String key = post.get("key", ""); // transmission key
final int urlc = post.getInt("urlc", 0); // number of transported urls
final boolean granted = sb.getConfigBool("allowReceiveIndex", false);
final boolean blockBlacklist = sb.getConfigBool("indexReceiveBlockBlacklist", false);
// response values
String result = "";
String doublevalues = "0";
final Seed otherPeer = sb.peers.get(iam);
final String otherPeerName = iam + ":" + ((otherPeer == null) ? "NULL" : (otherPeer.getName() + "/" + otherPeer.getVersion()));
if ((youare == null) || (!youare.equals(sb.peers.mySeed().hash))) {
Network.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Wrong target. Wanted peer=" + youare + ", iam=" + sb.peers.mySeed().hash);
result = "wrong_target";
} else if ((!granted) || (sb.isRobinsonMode())) {
Network.log.logInfo("Rejecting URLs from peer " + otherPeerName + ". Not granted.");
result = "error_not_granted";
} else {
int received = 0;
int blocked = 0;
int doublecheck = 0;
// read the urls from the other properties and store
String urls;
URIMetadataRow lEntry;
for (int i = 0; i < urlc; i++) {
serverCore.checkInterruption();
// read new lurl-entry
urls = post.get("url" + i);
if (urls == null) {
if (Network.log.isFine()) Network.log.logFine("transferURL: got null URL-string from peer " + otherPeerName);
blocked++;
continue;
}
// parse new lurl-entry
lEntry = URIMetadataRow.importEntry(urls);
if (lEntry == null) {
if (Network.log.isWarning()) Network.log.logWarning("transferURL: received invalid URL (entry null) from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}
// check if entry is well-formed
if (lEntry.url() == null) {
if (Network.log.isWarning()) Network.log.logWarning("transferURL: received invalid URL from peer " + otherPeerName + "\n\tURL Property: " + urls);
blocked++;
continue;
}
// check whether entry is too old
if (lEntry.freshdate().getTime() <= freshdate) {
if (Network.log.isFine()) Network.log.logFine("transerURL: received too old URL from peer " + otherPeerName + ": " + lEntry.freshdate());
blocked++;
continue;
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(lEntry.url());
if (urlRejectReason != null) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked URL '" + lEntry.url() + "' (" + urlRejectReason + ") from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;
}
// doublecheck
if (sb.index.exists(lEntry.hash())) {
if (Network.log.isFine()) Network.log.logFine("transferURL: double URL '" + lEntry.url() + "' from peer " + otherPeerName);
lEntry = null;
doublecheck++;
continue;
}
// write entry to database
if (Network.log.isFine()) Network.log.logFine("Accepting URL " + i + "/" + urlc + " from peer " + otherPeerName + ": " + lEntry.url().toNormalform(true));
try {
sb.index.fulltext().putMetadata(lEntry);
ResultURLs.stack(ASCII.String(lEntry.url().hash()), lEntry.url().getHost(), iam.getBytes(), iam.getBytes(), EventOrigin.DHT_TRANSFER);
if (Network.log.isFine()) Network.log.logFine("transferURL: received URL '" + lEntry.url().toNormalform(false) + "' from peer " + otherPeerName);
received++;
} catch (final IOException e) {
Log.logException(e);
}
}
sb.peers.mySeed().incRU(received);
// return rewrite properties
Network.log.logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, blocked " + blocked + " URLs");
EventChannel.channels(EventChannel.DHTRECEIVE).addMessage(new RSSMessage("Received " + received + ", blocked " + blocked + " URLs from peer " + otherPeerName, "", otherPeer.hash));
if (doublecheck > 0) {
Network.log.logSevere("Received " + doublecheck + " double URLs from peer " + otherPeerName);
doublevalues = Integer.toString(doublecheck);
}
result = "ok";
}
prop.put("double", doublevalues);
prop.put("result", result);
return prop;
}
}