mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
refactoring of load_delay: this is a matter of client identification
This commit is contained in:
parent
0d0b3a30f5
commit
bcc623a843
|
@ -28,7 +28,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.retrieval.Request;
|
||||
import net.yacy.crawler.retrieval.Response;
|
||||
import net.yacy.crawler.robots.RobotsTxtEntry;
|
||||
|
@ -88,19 +87,19 @@ public class CrawlCheck_p {
|
|||
robotsEntry = sb.robots.getEntry(u, sb.peers.myBotIDs());
|
||||
if (robotsEntry == null) {
|
||||
prop.put("table_list_" + row + "_robots", "no robots");
|
||||
prop.put("table_list_" + row + "_crawldelay", CrawlQueues.queuedMinLoadDelay + " ms");
|
||||
prop.put("table_list_" + row + "_crawldelay", ClientIdentification.minLoadDelay() + " ms");
|
||||
prop.put("table_list_" + row + "_sitemap", "");
|
||||
} else {
|
||||
robotsAllowed = !robotsEntry.isDisallowed(u);
|
||||
prop.put("table_list_" + row + "_robots", "robots exist: " + (robotsAllowed ? "crawl allowed" : "url disallowed"));
|
||||
prop.put("table_list_" + row + "_crawldelay", Math.max(CrawlQueues.queuedMinLoadDelay, robotsEntry.getCrawlDelayMillis()) + " ms");
|
||||
prop.put("table_list_" + row + "_crawldelay", Math.max(ClientIdentification.minLoadDelay(), robotsEntry.getCrawlDelayMillis()) + " ms");
|
||||
prop.put("table_list_" + row + "_sitemap", robotsEntry.getSitemap() == null ? "-" : robotsEntry.getSitemap().toNormalform(true));
|
||||
}
|
||||
|
||||
// try to load the url
|
||||
if (robotsAllowed) try {
|
||||
Request request = sb.loader.request(u, true, false);
|
||||
final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
if (response == null) {
|
||||
prop.put("table_list_" + row + "_access", "no response");
|
||||
} else {
|
||||
|
|
|
@ -43,7 +43,6 @@ import net.yacy.cora.util.ConcurrentLog;
|
|||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.crawler.CrawlSwitchboard;
|
||||
import net.yacy.crawler.data.CrawlProfile;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.data.ZURL.FailCategory;
|
||||
import net.yacy.crawler.retrieval.Request;
|
||||
import net.yacy.crawler.retrieval.SitemapImporter;
|
||||
|
@ -288,7 +287,7 @@ public class Crawler_p {
|
|||
// download document
|
||||
Document scraper;
|
||||
try {
|
||||
scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = sb.loader.loadDocument(sitelistURL, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
// get links and generate filter
|
||||
for (DigestURI u: scraper.getAnchors().keySet()) {
|
||||
newRootURLs.add(u);
|
||||
|
|
|
@ -27,7 +27,6 @@ import net.yacy.cora.geo.OpenGeoDBLocation;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.retrieval.Response;
|
||||
import net.yacy.document.LibraryProvider;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
@ -67,7 +66,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geon0Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
|
||||
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file(), null, -1));
|
||||
|
@ -109,7 +108,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geon1Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEON1.file());
|
||||
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON1.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON1.file(), null, -1));
|
||||
|
@ -151,7 +150,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geon2Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON2.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEON2.file());
|
||||
LibraryProvider.geoLoc.activateLocation(LibraryProvider.Dictionary.GEON2.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON2.file(), null, 100000));
|
||||
|
@ -193,7 +192,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("geo1Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
|
||||
LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname);
|
||||
|
@ -236,7 +235,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("drw0Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file());
|
||||
LibraryProvider.activateDeReWo();
|
||||
|
@ -280,7 +279,7 @@ public class DictionaryLoader_p {
|
|||
if (post.containsKey("pnd0Load")) {
|
||||
// load from the net
|
||||
try {
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] b = response.getContent();
|
||||
FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file());
|
||||
LibraryProvider.activatePND();
|
||||
|
|
|
@ -42,7 +42,6 @@ import net.yacy.cora.util.CommonPattern;
|
|||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.crawler.HarvestProcess;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.retrieval.RSSLoader;
|
||||
import net.yacy.crawler.retrieval.Response;
|
||||
import net.yacy.data.WorkTables;
|
||||
|
@ -267,7 +266,7 @@ public class Load_RSS_p {
|
|||
RSSReader rss = null;
|
||||
if (url != null) try {
|
||||
prop.put("url", url.toNormalform(true));
|
||||
final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = sb.loader.load(sb.loader.request(url, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] resource = response == null ? null : response.getContent();
|
||||
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
|
||||
} catch (final IOException e) {
|
||||
|
|
|
@ -45,7 +45,6 @@ import net.yacy.cora.lod.vocabulary.YaCyMetadata;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.crawler.data.Cache;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.retrieval.Response;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.Document;
|
||||
|
@ -169,7 +168,7 @@ public class ViewFile {
|
|||
|
||||
Response response = null;
|
||||
try {
|
||||
response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CacheStrategy.IFEXIST : CacheStrategy.CACHEONLY, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
prop.put("error", "4");
|
||||
prop.put("error_errorText", "error loading resource: " + e.getMessage());
|
||||
|
|
|
@ -39,7 +39,6 @@ import net.yacy.cora.protocol.HeaderFramework;
|
|||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.storage.ConcurrentARC;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.data.URLLicense;
|
||||
import net.yacy.document.ImageParser;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
@ -105,7 +104,7 @@ public class ViewImage {
|
|||
if (image == null) {
|
||||
byte[] resourceb = null;
|
||||
if (url != null) try {
|
||||
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, BlacklistType.SEARCH, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage());
|
||||
}
|
||||
|
|
|
@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.robots.RobotsTxtEntry;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.repository.Blacklist.BlacklistType;
|
||||
|
@ -97,7 +96,7 @@ public class getpageinfo {
|
|||
}
|
||||
net.yacy.document.Document scraper = null;
|
||||
if (u != null) try {
|
||||
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
// bad things are possible, i.e. that the Server responds with "403 Bad Behavior"
|
||||
|
|
|
@ -37,7 +37,6 @@ import net.yacy.cora.federate.yacy.CacheStrategy;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.robots.RobotsTxtEntry;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.repository.Blacklist.BlacklistType;
|
||||
|
@ -97,7 +96,7 @@ public class getpageinfo_p {
|
|||
}
|
||||
net.yacy.document.Document scraper = null;
|
||||
if (u != null) try {
|
||||
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = sb.loader.loadDocument(u, CacheStrategy.IFEXIST, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
// bad things are possible, i.e. that the Server responds with "403 Bad Behavior"
|
||||
|
|
|
@ -35,7 +35,6 @@ import net.yacy.cora.order.Base64Order;
|
|||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.kelondro.data.citation.CitationReference;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
import net.yacy.kelondro.rwi.IndexCell;
|
||||
|
@ -98,7 +97,7 @@ public class webstructure {
|
|||
prop.put("references", 1);
|
||||
net.yacy.document.Document scraper = null;
|
||||
if (url != null) try {
|
||||
scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = sb.loader.loadDocument(url, CacheStrategy.IFEXIST, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
ConcurrentLog.logException(e);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ package net.yacy.cora.protocol;
|
|||
|
||||
public class ClientIdentification {
|
||||
|
||||
public static final long MIN_LOAD_DELAY = 500;
|
||||
public static final int DEFAULT_TIMEOUT = 10000;
|
||||
public static final int minimumLocalDeltaInit = 10; // the minimum time difference between access of the same local domain
|
||||
public static final int minimumGlobalDeltaInit = 500; // the minimum time difference between access of the same global domain
|
||||
|
@ -118,4 +119,8 @@ public class ClientIdentification {
|
|||
|
||||
return location;
|
||||
}
|
||||
|
||||
public static long minLoadDelay() {
|
||||
return MIN_LOAD_DELAY;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,7 +63,6 @@ import net.yacy.search.SwitchboardConstants;
|
|||
|
||||
public class CrawlQueues {
|
||||
|
||||
public static final long queuedMinLoadDelay = 500;
|
||||
private static final String ERROR_DB_FILENAME = "urlError4.db";
|
||||
private static final String DELEGATED_DB_FILENAME = "urlDelegated4.db";
|
||||
|
||||
|
@ -654,7 +653,7 @@ public class CrawlQueues {
|
|||
try {
|
||||
this.request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
|
||||
final CrawlProfile e = CrawlQueues.this.sb.crawler.getActive(UTF8.getBytes(this.request.profileHandle()));
|
||||
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = CrawlQueues.this.sb.loader.load(this.request, e == null ? CacheStrategy.IFEXIST : e.cacheStrategy(), BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
if (response == null) {
|
||||
this.request.setStatus("error", WorkflowJob.STATUS_FINISHED);
|
||||
if (CrawlQueues.this.log.isFine()) {
|
||||
|
|
|
@ -45,7 +45,6 @@ import net.yacy.cora.storage.ComparableARC;
|
|||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.crawler.HarvestProcess;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.data.WorkTables;
|
||||
import net.yacy.kelondro.blob.Tables;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
@ -71,7 +70,7 @@ public class RSSLoader extends Thread {
|
|||
public void run() {
|
||||
RSSReader rss = null;
|
||||
try {
|
||||
final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = this.sb.loader.load(this.sb.loader.request(this.urlf, true, false), CacheStrategy.NOCACHE, Integer.MAX_VALUE, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final byte[] resource = response == null ? null : response.getContent();
|
||||
rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
|
||||
} catch (final MalformedURLException e) {
|
||||
|
|
|
@ -53,7 +53,6 @@ import net.yacy.cora.protocol.ResponseHeader;
|
|||
import net.yacy.cora.protocol.http.HTTPClient;
|
||||
import net.yacy.cora.storage.Files;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.document.Document;
|
||||
import net.yacy.document.parser.tarParser;
|
||||
import net.yacy.kelondro.data.meta.DigestURI;
|
||||
|
@ -239,7 +238,7 @@ public final class yacyRelease extends yacyVersion {
|
|||
try {
|
||||
final DigestURI uri = location.getLocationURL();
|
||||
Thread.currentThread().setName("allReleaseFrom - host " + uri.getHost()); // makes it more easy to see which release blocks process in thread dump
|
||||
scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = Switchboard.getSwitchboard().loader.loadDocument(uri, CacheStrategy.NOCACHE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (final IOException e) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -2879,7 +2879,7 @@ public final class Switchboard extends serverSwitch {
|
|||
// get a scraper to get the title
|
||||
Document scraper;
|
||||
try {
|
||||
scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
scraper = this.loader.loadDocument(url, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
} catch (IOException e) {
|
||||
return "scraper cannot load URL: " + e.getMessage();
|
||||
}
|
||||
|
@ -2986,7 +2986,7 @@ public final class Switchboard extends serverSwitch {
|
|||
String urlName = url.toNormalform(true);
|
||||
Thread.currentThread().setName("Switchboard.addToIndex:" + urlName);
|
||||
try {
|
||||
final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT);
|
||||
final Response response = Switchboard.this.loader.load(request, CacheStrategy.IFFRESH, BlacklistType.CRAWLER, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT);
|
||||
if (response == null) {
|
||||
throw new IOException("response == null");
|
||||
}
|
||||
|
|
|
@ -55,7 +55,6 @@ import net.yacy.cora.storage.HandleSet;
|
|||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.cora.util.LookAheadIterator;
|
||||
import net.yacy.cora.util.SpaceExceededException;
|
||||
import net.yacy.crawler.data.CrawlQueues;
|
||||
import net.yacy.crawler.retrieval.Response;
|
||||
import net.yacy.document.Condenser;
|
||||
import net.yacy.document.Document;
|
||||
|
@ -812,7 +811,7 @@ public class Segment {
|
|||
|
||||
try {
|
||||
// parse the resource
|
||||
final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, CrawlQueues.queuedMinLoadDelay, ClientIdentification.DEFAULT_TIMEOUT));
|
||||
final Document document = Document.mergeDocuments(url, null, loader.loadDocuments(loader.request(url, true, false), cacheStrategy, Integer.MAX_VALUE, null, ClientIdentification.minLoadDelay(), ClientIdentification.DEFAULT_TIMEOUT));
|
||||
if (document == null) {
|
||||
// delete just the url entry
|
||||
fulltext().remove(urlhash);
|
||||
|
|
Loading…
Reference in New Issue
Block a user