From fce9e7741bbac7228de4c512462e239ddce2b4da Mon Sep 17 00:00:00 2001 From: theli Date: Mon, 4 Sep 2006 11:56:47 +0000 Subject: [PATCH] *) next step of restructuring for new crawlers - renaming of http specific crawler settings git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2480 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/crawler/AbstractCrawlWorker.java | 5 +++++ source/de/anomic/plasma/crawler/http/CrawlWorker.java | 11 +++-------- source/migration.java | 8 +++++++- yacy.init | 8 +++++--- 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index 8ef2932ba..7001a51bb 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -99,6 +99,11 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW public abstract void close(); + public long getDuration() { + final long startDate = this.startdate; + return (startDate != 0) ? System.currentTimeMillis() - startDate : 0; + } + public void run() { this.running = true; diff --git a/source/de/anomic/plasma/crawler/http/CrawlWorker.java b/source/de/anomic/plasma/crawler/http/CrawlWorker.java index a1aaebc9a..07a195d72 100644 --- a/source/de/anomic/plasma/crawler/http/CrawlWorker.java +++ b/source/de/anomic/plasma/crawler/http/CrawlWorker.java @@ -108,11 +108,6 @@ public final class CrawlWorker extends AbstractCrawlWorker { this.protocol = "http"; } - public long getDuration() { - final long startDate = this.startdate; - return (startDate != 0) ? System.currentTimeMillis() - startDate : 0; - } - public void init() { // refreshing timeout value if (this.theMsg.timeout < 0) { @@ -122,9 +117,9 @@ public final class CrawlWorker extends AbstractCrawlWorker { } // some http header values - this.acceptEncoding = this.sb.getConfig("crawler.acceptEncoding", "gzip,deflate"); - this.acceptLanguage = this.sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"); - this.acceptCharset = this.sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"); + this.acceptEncoding = this.sb.getConfig("crawler.http.acceptEncoding", "gzip,deflate"); + this.acceptLanguage = this.sb.getConfig("crawler.http.acceptLanguage","en-us,en;q=0.5"); + this.acceptCharset = this.sb.getConfig("crawler.http.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"); // getting the http proxy config this.remoteProxyConfig = this.sb.remoteProxyConfig; diff --git a/source/migration.java b/source/migration.java index faf853a17..e5e9e2cdf 100644 --- a/source/migration.java +++ b/source/migration.java @@ -255,6 +255,12 @@ public class migration { sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared","")); } + + // migration of http specific crawler settings + if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) { + sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate")); + sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5")); + sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7")); + } } - } diff --git a/yacy.init b/yacy.init index a9f34757a..3c24a186f 100644 --- a/yacy.init +++ b/yacy.init @@ -631,11 +631,13 @@ msgForwardingTo=root@localhost onlineCautionDelay=30000 # Some configuration values for the crawler -crawler.acceptEncoding=gzip,deflate -crawler.acceptLanguage=en-us,en;q=0.5 -crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 crawler.clientTimeout=9000 +# http crawler specific settings +crawler.http.acceptEncoding=gzip,deflate +crawler.http.acceptLanguage=en-us,en;q=0.5 +crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7 + # maximum number of crawler threads crawler.MaxActiveThreads = 10 crawler.MaxIdleThreads = 7