mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fixed must-match filter for smb crawling
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7222 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
099def2a04
commit
c3bf17a3a1
|
@ -144,7 +144,13 @@ public class Crawler_p {
|
||||||
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted
|
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted
|
||||||
// special cases:
|
// special cases:
|
||||||
if (crawlingStartURL!= null && fullDomain) {
|
if (crawlingStartURL!= null && fullDomain) {
|
||||||
newcrawlingMustMatch = crawlingStartURL.isFile() ? "file://" + crawlingStartURL.getPath() + ".*" : crawlingStartURL.isSMB() ? "smb://" + crawlingStartURL.getPath() + ".*" : ".*" + crawlingStartURL.getHost() + ".*";
|
if (crawlingStartURL.isFile()) {
|
||||||
|
newcrawlingMustMatch = "file://" + crawlingStartURL.getPath() + ".*";
|
||||||
|
} else if (crawlingStartURL.isSMB()) {
|
||||||
|
newcrawlingMustMatch = "smb://.*" + crawlingStartURL.getHost() + ".*" + crawlingStartURL.getPath() + ".*";
|
||||||
|
} else {
|
||||||
|
newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (crawlingStart!= null && subPath && (pos = crawlingStart.lastIndexOf('/')) > 0) {
|
if (crawlingStart!= null && subPath && (pos = crawlingStart.lastIndexOf('/')) > 0) {
|
||||||
newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
|
newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
|
||||||
|
|
Loading…
Reference in New Issue
Block a user