fix for bug in html scraper that appears if opening and closing tag are not both in same case

see http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1173&p=7836#p7836 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4844 6c8d7289-2bf4-0310-a012-ef5d649a1542
2024-09-19 00:01:41 +02:00 · 2008-05-24 10:47:22 +00:00 · 2008-05-24 10:47:22 +00:00 · f8b015949c
commit f8b015949c
parent d8277e6af1
2 changed files with 2 additions and 1 deletions
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@ -72,6 +72,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
    private static final HashSet<String> linkTags0 = new HashSet<String>(9,0.99f);
    private static final HashSet<String> linkTags1 = new HashSet<String>(7,0.99f);

+    // all these tags must be given in lowercase, because the tags from the files are compared in lowercase
    static {
        linkTags0.add("img");
        linkTags0.add("base");
--- a/source/de/anomic/htmlFilter/htmlFilterWriter.java
+++ b/source/de/anomic/htmlFilter/htmlFilterWriter.java
@ -231,7 +231,7 @@ public final class htmlFilterWriter extends Writer {
        }
        
        // it's a tag! which one?
-        if ((opening) || (!(tag.equals(filterTag)))) {
+        if ((opening) || (!(tag.equalsIgnoreCase(filterTag)))) {
            // this tag is not our concern. just add it
            filterCont.append(genTag0raw(tag, opening, content));
            return new char[0];