mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
CrawlCheck_p.html : also display info about disallowed URLs.
This commit is contained in:
parent
8b341e9818
commit
54cfcc3f56
|
@ -393,13 +393,18 @@ public class RobotsTxt {
|
||||||
// try to load the robots
|
// try to load the robots
|
||||||
RobotsTxtEntry robotsEntry = getEntry(u, userAgent);
|
RobotsTxtEntry robotsEntry = getEntry(u, userAgent);
|
||||||
boolean robotsAllowed = robotsEntry == null ? true : !robotsEntry.isDisallowed(u);
|
boolean robotsAllowed = robotsEntry == null ? true : !robotsEntry.isDisallowed(u);
|
||||||
if (robotsAllowed) try {
|
if (robotsAllowed) {
|
||||||
|
try {
|
||||||
Request request = loader.request(u, true, false);
|
Request request = loader.request(u, true, false);
|
||||||
Response response = loader.load(request, CacheStrategy.NOCACHE, BlacklistType.CRAWLER, userAgent);
|
Response response = loader.load(request, CacheStrategy.NOCACHE,
|
||||||
|
BlacklistType.CRAWLER, userAgent);
|
||||||
out.put(new CheckEntry(u, robotsEntry, response, null));
|
out.put(new CheckEntry(u, robotsEntry, response, null));
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
out.put(new CheckEntry(u, robotsEntry, null, "error response: " + e.getMessage()));
|
out.put(new CheckEntry(u, robotsEntry, null, "error response: " + e.getMessage()));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
out.put(new CheckEntry(u, robotsEntry, null, null));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (InterruptedException e) {}
|
} catch (InterruptedException e) {}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user