fixed bug with crawl profiles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@12 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-04-10 23:51:42 +00:00
parent 97dad7eaf4
commit 89eb9a2292
3 changed files with 21 additions and 56 deletions

View File

@ -204,6 +204,9 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
// make crawl profiles database and default profiles
profiles = new plasmaCrawlProfile(new File(plasmaPath, "crawlProfiles0.db"));
//System.out.println("profiles.size=" + profiles.size());
//System.out.println("profile-config=" + getConfig("defaultProxyProfile", "").length());
//System.out.println("profile-entry=" + profiles.getEntry(getConfig("defaultProxyProfile", "")).toString());
if ((profiles.size() == 0) ||
(getConfig("defaultProxyProfile", "").length() == 0) ||
(profiles.getEntry(getConfig("defaultProxyProfile", "")) == null)) {
@ -278,24 +281,25 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
long[] testresult = facilityDB.selectLong("statistik", "yyyyMMddHHm");
testresult = facilityDB.selectLong("statistik", (new serverDate()).toShortString(false).substring(0, 11));
// start yacy core
yacyCore yc = new yacyCore(this);
serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000);
// deploy threads
deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack",
new serverInstantThread(cacheManager, "job", "size"), log, 10000);
deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue",
new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), log, 20000);
deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling",
new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), log, 30000);
deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information" ,
new serverInstantThread(this, "cleanupJob", "cleanupJobSize"), log, 10000); // all 5 Minutes
deployThread("80_dequeue", "Indexing Dequeue", "thread that creates database entries from scraped web content and performes indexing" ,
new serverInstantThread(this, "deQueue", "queueSize"), log, 10000);
// start yacy core
yacyCore yc = new yacyCore(this);
serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000);
deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task",
new serverInstantThread(yc, "peerPing", null), yc.log, 6000);
deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack",
new serverInstantThread(cacheManager, "job", "size"), log, 10000);
deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling",
new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), log, 30000);
deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue",
new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), log, 20000);
deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account",
new serverInstantThread(yc, "publishSeedList", null), yc.log, 180000);
deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task",
new serverInstantThread(yc, "peerPing", null), yc.log, 4000);
indexDistribution = new distributeIndex(100 /*indexCount*/, 8000, 1 /*peerCount*/);
deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others",
new serverInstantThread(indexDistribution, "job", null), log, 120000);
@ -680,7 +684,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi
}
log.logDebug("plasmaSwitchboard.processCrawling: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() +
", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() +
", permission=" + (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false"));
", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")));
boolean tryRemote =
(profile.remoteIndexing()) /* granted */ &&

View File

@ -525,47 +525,3 @@ public class yacy {
}
}
/*
package de;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.Socket;
import javax.net.ssl.SSLSocketFactory;
public class ssltest {
public static final String TARGET_HTTPS_SERVER = "www.verisign.com";
public static final int TARGET_HTTPS_PORT = 443;
public static void main(String[] args) throws Exception {
Socket socket = SSLSocketFactory.getDefault().
createSocket(TARGET_HTTPS_SERVER, TARGET_HTTPS_PORT);
try {
Writer out = new OutputStreamWriter(
socket.getOutputStream(), "ISO-8859-1");
out.write("GET / HTTP/1.1\r\n");
out.write("Host: " + TARGET_HTTPS_SERVER + ":" +
TARGET_HTTPS_PORT + "\r\n");
out.write("Agent: SSL-TEST\r\n");
out.write("\r\n");
out.flush();
BufferedReader in = new BufferedReader(
new InputStreamReader(socket.getInputStream(), "ISO-8859-1"));
String line = null;
while ((line = in.readLine()) != null) {
System.out.println(line);
}
} finally {
socket.close();
}
}
}
*/

View File

@ -359,6 +359,11 @@ crawlingQ=false
storeHTCache=false
storeTXCache=true
# default crawl profile entries
# if these entries are empty, then a new entry will be generated
defaultProxyProfile=
defaultRemoteProfile=
# peers may initiate remote crawling tasks.
# every peer may allow or disallow to be used as crawling-peer;
# you can also set a maximum crawl depth that can be requested or accepted