*) next step of restructuring for new crawlers

- renaming of http specific crawler settings

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2480 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
theli 2006-09-04 11:56:47 +00:00
parent e3f0136606
commit fce9e7741b
4 changed files with 20 additions and 12 deletions

View File

@ -99,6 +99,11 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
public abstract void close();
public long getDuration() {
final long startDate = this.startdate;
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}
public void run() {
this.running = true;

View File

@ -108,11 +108,6 @@ public final class CrawlWorker extends AbstractCrawlWorker {
this.protocol = "http";
}
public long getDuration() {
final long startDate = this.startdate;
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
}
public void init() {
// refreshing timeout value
if (this.theMsg.timeout < 0) {
@ -122,9 +117,9 @@ public final class CrawlWorker extends AbstractCrawlWorker {
}
// some http header values
this.acceptEncoding = this.sb.getConfig("crawler.acceptEncoding", "gzip,deflate");
this.acceptLanguage = this.sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5");
this.acceptCharset = this.sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
this.acceptEncoding = this.sb.getConfig("crawler.http.acceptEncoding", "gzip,deflate");
this.acceptLanguage = this.sb.getConfig("crawler.http.acceptLanguage","en-us,en;q=0.5");
this.acceptCharset = this.sb.getConfig("crawler.http.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
// getting the http proxy config
this.remoteProxyConfig = this.sb.remoteProxyConfig;

View File

@ -255,6 +255,12 @@ public class migration {
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
}
// migration of http specific crawler settings
if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) {
sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate"));
sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"));
sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
}
}
}

View File

@ -631,11 +631,13 @@ msgForwardingTo=root@localhost
onlineCautionDelay=30000
# Some configuration values for the crawler
crawler.acceptEncoding=gzip,deflate
crawler.acceptLanguage=en-us,en;q=0.5
crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
crawler.clientTimeout=9000
# http crawler specific settings
crawler.http.acceptEncoding=gzip,deflate
crawler.http.acceptLanguage=en-us,en;q=0.5
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
# maximum number of crawler threads
crawler.MaxActiveThreads = 10
crawler.MaxIdleThreads = 7