mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) next step of restructuring for new crawlers
- renaming of http specific crawler settings git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2480 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
e3f0136606
commit
fce9e7741b
|
@ -99,6 +99,11 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW
|
|||
|
||||
public abstract void close();
|
||||
|
||||
public long getDuration() {
|
||||
final long startDate = this.startdate;
|
||||
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
this.running = true;
|
||||
|
||||
|
|
|
@ -108,11 +108,6 @@ public final class CrawlWorker extends AbstractCrawlWorker {
|
|||
this.protocol = "http";
|
||||
}
|
||||
|
||||
public long getDuration() {
|
||||
final long startDate = this.startdate;
|
||||
return (startDate != 0) ? System.currentTimeMillis() - startDate : 0;
|
||||
}
|
||||
|
||||
public void init() {
|
||||
// refreshing timeout value
|
||||
if (this.theMsg.timeout < 0) {
|
||||
|
@ -122,9 +117,9 @@ public final class CrawlWorker extends AbstractCrawlWorker {
|
|||
}
|
||||
|
||||
// some http header values
|
||||
this.acceptEncoding = this.sb.getConfig("crawler.acceptEncoding", "gzip,deflate");
|
||||
this.acceptLanguage = this.sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5");
|
||||
this.acceptCharset = this.sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
|
||||
this.acceptEncoding = this.sb.getConfig("crawler.http.acceptEncoding", "gzip,deflate");
|
||||
this.acceptLanguage = this.sb.getConfig("crawler.http.acceptLanguage","en-us,en;q=0.5");
|
||||
this.acceptCharset = this.sb.getConfig("crawler.http.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
|
||||
|
||||
// getting the http proxy config
|
||||
this.remoteProxyConfig = this.sb.remoteProxyConfig;
|
||||
|
|
|
@ -255,6 +255,12 @@ public class migration {
|
|||
|
||||
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
|
||||
}
|
||||
|
||||
// migration of http specific crawler settings
|
||||
if ((value = sb.getConfig("crawler.acceptLanguage","")).length() > 0) {
|
||||
sb.setConfig("crawler.http.acceptEncoding", sb.getConfig("crawler.acceptEncoding","gzip,deflate"));
|
||||
sb.setConfig("crawler.http.acceptLanguage", sb.getConfig("crawler.acceptLanguage","en-us,en;q=0.5"));
|
||||
sb.setConfig("crawler.http.acceptCharset", sb.getConfig("crawler.acceptCharset","ISO-8859-1,utf-8;q=0.7,*;q=0.7"));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -631,11 +631,13 @@ msgForwardingTo=root@localhost
|
|||
onlineCautionDelay=30000
|
||||
|
||||
# Some configuration values for the crawler
|
||||
crawler.acceptEncoding=gzip,deflate
|
||||
crawler.acceptLanguage=en-us,en;q=0.5
|
||||
crawler.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
||||
crawler.clientTimeout=9000
|
||||
|
||||
# http crawler specific settings
|
||||
crawler.http.acceptEncoding=gzip,deflate
|
||||
crawler.http.acceptLanguage=en-us,en;q=0.5
|
||||
crawler.http.acceptCharset=ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
||||
|
||||
# maximum number of crawler threads
|
||||
crawler.MaxActiveThreads = 10
|
||||
crawler.MaxIdleThreads = 7
|
||||
|
|
Loading…
Reference in New Issue
Block a user