mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fixed sitemap crawl start
This commit is contained in:
parent
b743e6d79f
commit
1a09771be8
|
@ -170,7 +170,8 @@ public class Crawler_p {
|
|||
if (t > 0) deleteageDate = new Date(t);
|
||||
}
|
||||
final boolean deleteold = (deleteage && deleteageDate != null) || (restrictedcrawl && post.getBoolean("deleteold"));
|
||||
|
||||
|
||||
final String sitemapURLStr = post.get("sitemapURL","");
|
||||
String crawlingStart0 = post.get("crawlingURL","").trim(); // the crawljob start url
|
||||
String[] rootURLs0 = crawlingStart0.indexOf('\n') > 0 || crawlingStart0.indexOf('\r') > 0 ? crawlingStart0.split("[\\r\\n]+") : crawlingStart0.split(Pattern.quote("|"));
|
||||
Set<DigestURL> rootURLs = new HashSet<DigestURL>();
|
||||
|
@ -199,7 +200,7 @@ public class Crawler_p {
|
|||
if (p >= 8) crawlName = crawlName.substring(0, p);
|
||||
}
|
||||
if (crawlName.endsWith(",")) crawlName = crawlName.substring(0, crawlName.length() - 1);
|
||||
|
||||
if (crawlName.length() == 0 && sitemapURLStr.length() > 0) crawlName = "sitemap loader for " + sitemapURLStr;
|
||||
|
||||
// set the crawl filter
|
||||
String ipMustMatch = post.get("ipMustmatch", CrawlProfile.MATCH_ALL_STRING);
|
||||
|
@ -442,7 +443,6 @@ public class Crawler_p {
|
|||
if (successurls.size() > 0) sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
|
||||
}
|
||||
} else if ("sitemap".equals(crawlingMode)) {
|
||||
final String sitemapURLStr = post.get("sitemapURL","");
|
||||
try {
|
||||
final DigestURL sitemapURL = new DigestURL(sitemapURLStr);
|
||||
sb.crawler.putActive(handle, profile);
|
||||
|
|
Loading…
Reference in New Issue
Block a user