BUGFIX for regular expression

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1261 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
borg-0300 2005-12-28 10:32:21 +00:00
parent 106cd29981
commit 8eb4181737

View File

@ -164,7 +164,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// (this is different from previous normal forms where a '/' must not appear in root paths; here it must appear. Makes everything easier.)
if (path.length() == 0 || path.charAt(0) != '/') path = "/" + path;
Pattern pathPattern = Pattern.compile("(/[^/\\.]+/)[.]{2}(?=/)|/\\.(?=/)|/(?=/)");
Matcher matcher = pathPattern.matcher(path);
while (matcher.find()) {
@ -172,6 +172,10 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
matcher.reset(path);
}
while (path.startsWith("/../")) {
path = path.substring(3);
}
if (defaultPort) return url.getProtocol() + "://" + url.getHost() + path;
return url.getProtocol() + "://" + url.getHost() + ":" + url.getPort() + path;
}