mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fix for bug in html scraper that appears if opening and closing tag are not both in same case
see http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1173&p=7836#p7836 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4844 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
d8277e6af1
commit
f8b015949c
|
@ -72,6 +72,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
|
|||
private static final HashSet<String> linkTags0 = new HashSet<String>(9,0.99f);
|
||||
private static final HashSet<String> linkTags1 = new HashSet<String>(7,0.99f);
|
||||
|
||||
// all these tags must be given in lowercase, because the tags from the files are compared in lowercase
|
||||
static {
|
||||
linkTags0.add("img");
|
||||
linkTags0.add("base");
|
||||
|
|
|
@ -231,7 +231,7 @@ public final class htmlFilterWriter extends Writer {
|
|||
}
|
||||
|
||||
// it's a tag! which one?
|
||||
if ((opening) || (!(tag.equals(filterTag)))) {
|
||||
if ((opening) || (!(tag.equalsIgnoreCase(filterTag)))) {
|
||||
// this tag is not our concern. just add it
|
||||
filterCont.append(genTag0raw(tag, opening, content));
|
||||
return new char[0];
|
||||
|
|
Loading…
Reference in New Issue
Block a user