fixed parser bug with lowercase force (appeared in: http://spellbound.sourceforge.net/)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@367 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
orbiter 2005-07-03 23:33:25 +00:00
parent 377e94f30e
commit 5159a090b0
9 changed files with 39 additions and 22 deletions

View File

@ -139,8 +139,7 @@ Alternatively, you can simply set a virtual server port on your NAT/Server to en
<p>This is the account that restricts access to the proxy function. You probably don't want to share the proxy to the internet, so you should set the IP-Number Access Domain to a pattern that corresponds to you local intranet. The default setting should be right in most cases. If you want, you can also set a proxy account so that every proxy user must authenticate first, but this is rather unusual.</p>
<p><table border="0" cellspacing="5">
<tr valign="top"><td>IP-Number filter:</td><td colspan ="2"><input name="
filter" type="text" size="50" maxlength="1000" value="#[proxyfilter]#"></td></tr>
<tr valign="top"><td>IP-Number filter:</td><td colspan ="2"><input name="proxyfilter" type="text" size="50" maxlength="1000" value="#[proxyfilter]#"></td></tr>
<tr valign="top"><td>Account Name:</td><td><input name="proxyuser" type="text" size="16" maxlength="16" value="#[proxyuser]#"></td>
<td valign="bottom" align="right" rowspan="3">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<input type="submit" name="proxyaccount" value="Submit"></td></tr>
<tr valign="top"><td>Password:</td><td><input name="proxypw1" type="password" size="16" maxlength="16" value=""></td></tr>

File diff suppressed because one or more lines are too long

View File

@ -40,15 +40,15 @@
package de.anomic.htmlFilter;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.Properties;
public abstract class htmlFilterAbstractTransformer implements htmlFilterTransformer {
private HashSet tags0;
private HashSet tags1;
private TreeSet tags0;
private TreeSet tags1;
public htmlFilterAbstractTransformer(HashSet tags0, HashSet tags1) {
public htmlFilterAbstractTransformer(TreeSet tags0, TreeSet tags1) {
this.tags0 = tags0;
this.tags1 = tags1;
}

View File

@ -43,9 +43,11 @@ package de.anomic.htmlFilter;
import java.net.URL;
import java.net.MalformedURLException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.Map;
import java.util.Properties;
import java.util.Locale;
import java.text.Collator;
import de.anomic.server.serverByteBuffer;
@ -54,15 +56,20 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// statics: for initialisation of the HTMLFilterAbstractScraper
private static HashSet linkTags0;
private static HashSet linkTags1;
private static TreeSet linkTags0;
private static TreeSet linkTags1;
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
static {
linkTags0 = new HashSet();
linkTags0 = new TreeSet(insensitiveCollator);
linkTags0.add("img");
linkTags0.add("base");
linkTags1 = new HashSet();
linkTags1 = new TreeSet(insensitiveCollator);
linkTags1.add("a");
linkTags1.add("h1");
linkTags1.add("title");

View File

@ -43,23 +43,30 @@ package de.anomic.htmlFilter;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashSet;
import java.util.TreeSet;
import java.util.Properties;
import java.util.Vector;
import java.util.Locale;
import java.text.Collator;
import de.anomic.server.serverByteBuffer;
public class htmlFilterContentTransformer extends htmlFilterAbstractTransformer implements htmlFilterTransformer {
// statics: for initialisation of the HTMLFilterAbstractTransformer
private static HashSet linkTags0;
private static HashSet linkTags1;
private static TreeSet linkTags0;
private static TreeSet linkTags1;
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
static {
linkTags0 = new HashSet();
linkTags0 = new TreeSet(insensitiveCollator);
linkTags0.add("img");
linkTags1 = new HashSet();
linkTags1 = new TreeSet(insensitiveCollator);
linkTags1.add("a");
}

View File

@ -254,14 +254,14 @@ public final class htmlFilterOutputStream extends OutputStream {
if (in[1] == '/') {
// a closing tag
tagend = tagEnd(in, 2);
tag = new String(in, 2, tagend - 2).toLowerCase();
tag = new String(in, 2, tagend - 2);
byte[] text = new byte[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, false, text, quotechar);
} else {
// an opening tag
tagend = tagEnd(in, 1);
tag = new String(in, 1, tagend - 1).toLowerCase();
tag = new String(in, 1, tagend - 1);
byte[] text = new byte[in.length - tagend - 1];
System.arraycopy(in, tagend, text, 0, in.length - tagend - 1);
return filterTag(tag, true, text, quotechar);

View File

@ -223,6 +223,8 @@ public final class httpd implements serverHandler {
int pos;
while (st.hasMoreTokens()) {
pattern = st.nextToken();
if (key.matches(pattern)) return true;
/*
pos = pattern.indexOf("*");
if (pos < 0) {
// no wild card: exact match
@ -232,6 +234,7 @@ public final class httpd implements serverHandler {
if ((key.startsWith(pattern.substring(0, pos))) &&
(key.endsWith(pattern.substring(pos + 1)))) return true;
}
*/
}
return false;
}

View File

@ -244,7 +244,7 @@ public class plasmaSnippetCache {
hash = (String) j.next();
pos = (Integer) hs.get(hash);
if (pos == null) {
remaininghashes.add(hash);
remaininghashes.add(new String(hash));
} else {
p = pos.intValue();
if (p > maxpos) maxpos = p;

View File

@ -0,0 +1 @@
ebcblue