From b71a60c04b022868e628cbfba02abc5ddfccd3c5 Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 27 Jun 2016 03:12:39 +0200 Subject: [PATCH] fix NPE in CrawlMonitorRemoteStart servlet due to missing startURL + add a startURL attribute while generating news record for above (in Crawler_p) --- htroot/CrawlMonitorRemoteStart.java | 8 ++++---- htroot/Crawler_p.java | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/htroot/CrawlMonitorRemoteStart.java b/htroot/CrawlMonitorRemoteStart.java index 8accaf87f..906de0ea5 100644 --- a/htroot/CrawlMonitorRemoteStart.java +++ b/htroot/CrawlMonitorRemoteStart.java @@ -61,8 +61,8 @@ public class CrawlMonitorRemoteStart { prop.put("otherCrawlStartInProgress_" + showedCrawl + "_dark", dark ? "1" : "0"); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_cre", record.created().toString()); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_peername", peername); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_startURL", record.attributes().get("startURL")); + prop.put("otherCrawlStartInProgress_" + showedCrawl + "_intention", record.attributes().get("intention")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartInProgress_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0"); showedCrawl++; @@ -88,8 +88,8 @@ public class CrawlMonitorRemoteStart { prop.put("otherCrawlStartFinished_" + showedCrawl + "_dark", dark ? "1" : "0"); prop.put("otherCrawlStartFinished_" + showedCrawl + "_cre", record.created().toString()); prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_peername", peername); - prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL").toString()); - prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention").toString()); + prop.putHTML("otherCrawlStartFinished_" + showedCrawl + "_startURL", record.attributes().get("startURL")); + prop.put("otherCrawlStartFinished_" + showedCrawl + "_intention", record.attributes().get("intention")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_generalDepth", record.attributes().get("generalDepth")); prop.put("otherCrawlStartFinished_" + showedCrawl + "_crawlingQ", ("true".equals(record.attributes().get("crawlingQ"))) ? "1" : "0"); showedCrawl++; diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 4f440a0ac..c964596e8 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -594,6 +594,9 @@ public class Crawler_p { m.remove("generalFilter"); m.remove("specificFilter"); m.put("intention", post.get("intention", "").replace(',', '/')); + if (successurls.size() > 0) { // just include at least one of the startURL's in case of multiple for the news service + m.put("startURL", successurls.iterator().next().toNormalform(true)); + } sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), NewsPool.CATEGORY_CRAWL_START, m); } } else {