refactoring of load_delay: this is a matter of client identification

This commit is contained in:
Michael Peter Christen 2013-07-12 16:24:56 +02:00
parent 0d0b3a30f5
commit bcc623a843
15 changed files with 27 additions and 35 deletions

View File

@ -28,7 +28,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.crawler.robots.RobotsTxtEntry;
@ -88,19 +87,19 @@ public class CrawlCheck_p {
robotsEntry = sb.robots.getEntry(u, sb.peers.myBotIDs()); robotsEntry = sb.robots.getEntry(u, sb.peers.myBotIDs());
if (robotsEntry == null) { if (robotsEntry == null) {
prop.put("table_list_" + row + "_robots", "no robots"); prop.put("table_list_" + row + "_robots", "no robots");
prop.put("table_list_" + row + "_crawldelay", CrawlQueues.queuedMinLoadDelay + " ms"); prop.put("table_list_" + row + "_crawldelay", ClientIdentification.minLoadDelay() + " ms");
prop.put("table_list_" + row + "_sitemap", ""); prop.put("table_list_" + row + "_sitemap", "");
} else { } else {
robotsAllowed = !robotsEntry.isDisallowed(u); robotsAllowed = !robotsEntry.isDisallowed(u);
prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed")); prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms"); prop.put("table_list_" + row + "_crawldelay", Math.max(ClientIdentification.minLoadDelay(), robotsEntry.getCrawlDelayMillis()) + " ms");
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true)); prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true));
} }
// try to load the url // try to load the url
if (robotsAllowed) try { if (robotsAllowed) try {
Request request = sb.loader.request(u, true, false); Request request = sb.loader.request(u, true, false);
final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
if (response == null) { if (response == null) {
prop.put("table_list_" + row + "_access", "no response"); prop.put("table_list_" + row + "_access", "no response");
} else { } else {

View File

@ -43,7 +43,6 @@ import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.CrawlSwitchboard; import net.yacy.crawler.CrawlSwitchboard;
import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.data.ZURL.FailCategory; import net.yacy.crawler.data.ZURL.FailCategory;
import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.SitemapImporter; import net.yacy.crawler.retrieval.SitemapImporter;
@ -288,7 +287,7 @@ public class Crawler_p {
// download document // download document
Document scraper; Document scraper;
try { try {
scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
// get links and generate filter // get links and generate filter
for (DigestURI u: scraper.getAnchors().keySet()) { for (DigestURI u: scraper.getAnchors().keySet()) {
newRootURLs.add(u); newRootURLs.add(u);

View File

@ -27,7 +27,6 @@ import net.yacy.cora.geo.OpenGeoDBLocation;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.LibraryProvider; import net.yacy.document.LibraryProvider;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -67,7 +66,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon0Load")) { if (post.containsKey("geon0Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1)); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
@ -109,7 +108,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon1Load")) { if (post.containsKey("geon1Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file()); FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1)); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
@ -151,7 +150,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geon2Load")) { if (post.containsKey("geon2Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file()); FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000)); LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
@ -193,7 +192,7 @@ public class DictionaryLoader_p {
if (post.containsKey("geo1Load")) { if (post.containsKey("geo1Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
@ -236,7 +235,7 @@ public class DictionaryLoader_p {
if (post.containsKey("drw0Load")) { if (post.containsKey("drw0Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file()); FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file());
LibraryProvider.activateDeReWo(); LibraryProvider.activateDeReWo();
@ -280,7 +279,7 @@ public class DictionaryLoader_p {
if (post.containsKey("pnd0Load")) { if (post.containsKey("pnd0Load")) {
// load from the net // load from the net
try { try {
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] b = response.getContent(); final byte[] b = response.getContent();
FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file()); FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file());
LibraryProvider.activatePND(); LibraryProvider.activatePND();

View File

@ -42,7 +42,6 @@ import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.HarvestProcess; import net.yacy.crawler.HarvestProcess;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.RSSLoader; import net.yacy.crawler.retrieval.RSSLoader;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.data.WorkTables; import net.yacy.data.WorkTables;
@ -267,7 +266,7 @@ public class Load_RSS_p {
RSSReader rss = null; RSSReader rss = null;
if (url != null) try { if (url != null) try {
prop.put("url", url.toNormalform(true)); prop.put("url", url.toNormalform(true));
final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] resource = response == null ? null : response.getContent(); final byte[] resource = response == null ? null : response.getContent();
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
} catch (final IOException e) { } catch (final IOException e) {

View File

@ -45,7 +45,6 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.crawler.data.Cache; import net.yacy.crawler.data.Cache;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.Document; import net.yacy.document.Document;
@ -169,7 +168,7 @@ public class ViewFile {
Response response = null; Response response = null;
try { try {
response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
prop.put("error", "4"); prop.put("error", "4");
prop.put("error_errorText", "error loading resource: " + e.getMessage()); prop.put("error_errorText", "error loading resource: " + e.getMessage());

View File

@ -39,7 +39,6 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.ConcurrentARC; import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.data.URLLicense; import net.yacy.data.URLLicense;
import net.yacy.document.ImageParser; import net.yacy.document.ImageParser;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -105,7 +104,7 @@ public class ViewImage {
if (image == null) { if (image == null) {
byte[] resourceb = null; byte[] resourceb = null;
if (url != null) try { if (url != null) try {
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage()); ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage());
} }

View File

@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.crawler.robots.RobotsTxtEntry;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
@ -97,7 +96,7 @@ public class getpageinfo {
} }
net.yacy.document.Document scraper = null; net.yacy.document.Document scraper = null;
if (u != null) try { if (u != null) try {
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
// bad things are possible, i.e. that the Server responds with "403 Bad Behavior" // bad things are possible, i.e. that the Server responds with "403 Bad Behavior"

View File

@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.robots.RobotsTxtEntry; import net.yacy.crawler.robots.RobotsTxtEntry;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.Blacklist.BlacklistType;
@ -97,7 +96,7 @@ public class getpageinfo_p {
} }
net.yacy.document.Document scraper = null; net.yacy.document.Document scraper = null;
if (u != null) try { if (u != null) try {
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
// bad things are possible, i.e. that the Server responds with "403 Bad Behavior" // bad things are possible, i.e. that the Server responds with "403 Bad Behavior"

View File

@ -35,7 +35,6 @@ import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.kelondro.data.citation.CitationReference; import net.yacy.kelondro.data.citation.CitationReference;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.IndexCell;
@ -98,7 +97,7 @@ public class webstructure {
prop.put("references", 1); prop.put("references", 1);
net.yacy.document.Document scraper = null; net.yacy.document.Document scraper = null;
if (url != null) try { if (url != null) try {
scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }

View File

@ -27,6 +27,7 @@ package net.yacy.cora.protocol;
public class ClientIdentification { public class ClientIdentification {
public static final long MIN_LOAD_DELAY = 500;
public static final int DEFAULT_TIMEOUT = 10000; public static final int DEFAULT_TIMEOUT = 10000;
public static final int minimumLocalDeltaInit = 10; // the minimum time difference between access of the same local domain public static final int minimumLocalDeltaInit = 10; // the minimum time difference between access of the same local domain
public static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain public static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain
@ -118,4 +119,8 @@ public class ClientIdentification {
return location; return location;
} }
public static long minLoadDelay() {
return MIN_LOAD_DELAY;
}
} }

View File

@ -63,7 +63,6 @@ import net.yacy.search.SwitchboardConstants;
public class CrawlQueues { public class CrawlQueues {
public static final long queuedMinLoadDelay = 500;
private static final String ERROR_DB_FILENAME = "urlError4.db"; private static final String ERROR_DB_FILENAME = "urlError4.db";
private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db"; private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
@ -654,7 +653,7 @@ public class CrawlQueues {
try { try {
this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING); this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle())); final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
if (response == null) { if (response == null) {
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED); this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
if (CrawlQueues.this.log.isFine()) { if (CrawlQueues.this.log.isFine()) {

View File

@ -45,7 +45,6 @@ import net.yacy.cora.storage.ComparableARC;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.HarvestProcess; import net.yacy.crawler.HarvestProcess;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.data.WorkTables; import net.yacy.data.WorkTables;
import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -71,7 +70,7 @@ public class RSSLoader extends Thread {
public void run() { public void run() {
RSSReader rss = null; RSSReader rss = null;
try { try {
final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
final byte[] resource = response == null ? null : response.getContent(); final byte[] resource = response == null ? null : response.getContent();
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource); rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
} catch (final MalformedURLException e) { } catch (final MalformedURLException e) {

View File

@ -53,7 +53,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.Files; import net.yacy.cora.storage.Files;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.document.Document; import net.yacy.document.Document;
import net.yacy.document.parser.tarParser; import net.yacy.document.parser.tarParser;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -239,7 +238,7 @@ public final class yacyRelease extends yacyVersion {
try { try {
final DigestURI uri = location.getLocationURL(); final DigestURI uri = location.getLocationURL();
Thread.currentThread().setName("allReleaseFrom - host " + uri.getHost()); // makes it more easy to see which release blocks process in thread dump Thread.currentThread().setName("allReleaseFrom - host " + uri.getHost()); // makes it more easy to see which release blocks process in thread dump
scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (final IOException e) { } catch (final IOException e) {
return null; return null;
} }

View File

@ -2879,7 +2879,7 @@ public final class Switchboard extends serverSwitch {
// get a scraper to get the title // get a scraper to get the title
Document scraper; Document scraper;
try { try {
scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
} catch (IOException e) { } catch (IOException e) {
return "scraper cannot load URL: " + e.getMessage(); return "scraper cannot load URL: " + e.getMessage();
} }
@ -2986,7 +2986,7 @@ public final class Switchboard extends serverSwitch {
String urlName = url.toNormalform(true); String urlName = url.toNormalform(true);
Thread.currentThread().setName("Switchboard.addToIndex:" + urlName); Thread.currentThread().setName("Switchboard.addToIndex:" + urlName);
try { try {
final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT); final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
if (response == null) { if (response == null) {
throw new IOException("response == null"); throw new IOException("response == null");
} }

View File

@ -55,7 +55,6 @@ import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.LookAheadIterator; import net.yacy.cora.util.LookAheadIterator;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.retrieval.Response; import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.Document; import net.yacy.document.Document;
@ -812,7 +811,7 @@ public class Segment {
try { try {
// parse the resource // parse the resource
final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT)); final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT));
if (document == null) { if (document == null) {
// delete just the url entry // delete just the url entry
fulltext().remove(urlhash); fulltext().remove(urlhash);