diff --git a/htroot/Bookmarks.java b/htroot/Bookmarks.java
index b99874aad..7e5ceb266 100644
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@@ -256,7 +256,7 @@ public class Bookmarks {
while(count
"));
+ sb.append("\">").append(htmlTools.replaceXMLEntities(ps[j].getString()).replaceAll("\n", "
"));
sb.append("");
}
sb.append("
+ public static final String[] xmlentities={ + // Ampersands _have_ to be replaced first. If they were replaced later, + // other replaced characters containing ampersands would get messed up. + "\u0026","&", //ampersand + "\"",""", //quotation mark + "\u003C","<", //less than + "\u003E",">", //greater than + }; + + //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and + //patterns that will be replaced. To add new codes or patterns, just put them at the end + //of the list. Codes or patterns in this list can not be escaped with [= or"); - for (int i=0; i+ public static final String[] htmlentities={ + "\u005E","^", // Caret + + "\u0060","`", // Accent Grave ` + "\u007B","{", // { + "\u007C","|", // | + "\u007D","}", // } + "\u007E","~", // ~ + + "\u0082","", + "\u0083","", + "\u0084","", + "\u0085"," ", + "\u0086","", + "\u0087","", + "\u0088","", + "\u0089","", + "\u008A","", + "\u008B","", + "\u008C","", + "\u008D","", + "\u008E","", + + "\u0091","", + "\u0092","", + "\u0093","", + "\u0094","", + "\u0095","", + "\u0096","", + "\u0097","", + "\u0098","", + "\u0099","", + "\u009A","", + "\u009B","", + "\u009C","", + "\u009D","", + "\u009E","", + "\u009F","", + + "\u00A1","¡", //inverted (spanish) exclamation mark + "\u00A2","¢", //cent + "\u00A3","£", //pound + "\u00A4","¤", //currency + "\u00A5","¥", //yen + "\u00A6","¦", //broken vertical bar + "\u00A7","§", //section sign + "\u00A8","¨", //diaeresis (umlaut) + "\u00A9","©", //copyright sign + "\u00AA","ª", //feminine ordinal indicator + "\u00AB","«", //left-pointing double angle quotation mark + "\u00AC","¬", //not sign + "\u00AD","", //soft hyphen + "\u00AE","®", //registered sign + "\u00AF","¯", //macron + "\u00B0","°", //degree sign + "\u00B1","±", //plus-minus sign + "\u00B2","²", //superscript two + "\u00B3","³", //superscript three + "\u00B4","´", //acute accent + "\u00B5","µ", //micro sign + "\u00B6","¶", //paragraph sign + "\u00B7","·", //middle dot + "\u00B8","¸", //cedilla + "\u00B9","¹", //superscript one + "\u00BA","º", //masculine ordinal indicator + "\u00BB","»", //right-pointing double angle quotation mark + "\u00BC","¼", //fraction 1/4 + "\u00BD","½", //fraction 1/2 + "\u00BE","¾", //fraction 3/4 + "\u00BF","¿", //inverted (spanisch) questionmark + "\u00C0","À", + "\u00C1","Á", + "\u00C2","Â", + "\u00C3","Ã", + "\u00C4","Ä", + "\u00C5","Å", + "\u00C6","Æ", + "\u00C7","Ç", + "\u00C8","È", + "\u00C9","É", + "\u00CA","Ê", + "\u00CB","Ë", + "\u00CC","Ì", + "\u00CD","Í", + "\u00CE","Î", + "\u00CF","Ï", + "\u00D0","Ð", + "\u00D1","Ñ", + "\u00D2","Ò", + "\u00D3","Ó", + "\u00D4","Ô", + "\u00D5","Õ", + "\u00D6","Ö", + "\u00D7","×", + "\u00D8","Ø", + "\u00D9","Ù", + "\u00DA","Ú", + "\u00DB","Û", + "\u00DC","Ü", + "\u00DD","Ý", + "\u00DE","Þ", + "\u00DF","ß", + "\u00E0","à", + "\u00E1","á", + "\u00E2","â", + "\u00E3","ã", + "\u00E4","ä", + "\u00E5","å", + "\u00E6","æ", + "\u00E7","ç", + "\u00E8","è", + "\u00E9","é", + "\u00EA","ê", + "\u00EB","ë", + "\u00EC","ì", + "\u00ED","í", + "\u00EE","î", + "\u00EF","ï", + "\u00F0","ð", + "\u00F1","ñ", + "\u00F2","ò", + "\u00F3","ó", + "\u00F4","ô", + "\u00F5","õ", + "\u00F6","ö", + "\u00F7","÷", + "\u00F8","ø", + "\u00F9","ù", + "\u00FA","ú", + "\u00FB","û", + "\u00FC","ü", + "\u00FD","ý", + "\u00FE","þ", + "\u00FF","ÿ" + }; +} diff --git a/source/de/anomic/data/wiki/WikiParserException.java b/source/de/anomic/data/wiki/WikiParserException.java deleted file mode 100644 index ce2769111..000000000 --- a/source/de/anomic/data/wiki/WikiParserException.java +++ /dev/null @@ -1,20 +0,0 @@ -package de.anomic.data.wiki; - -public class WikiParserException extends RuntimeException { - - private static final long serialVersionUID = 1L; - - public WikiParserException() { } - - public WikiParserException(String message) { - super(message); - } - - public WikiParserException(Throwable cause) { - super(cause); - } - - public WikiParserException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/source/de/anomic/data/wiki/abstractWikiParser.java b/source/de/anomic/data/wiki/abstractWikiParser.java new file mode 100644 index 000000000..ac038c6a6 --- /dev/null +++ b/source/de/anomic/data/wiki/abstractWikiParser.java @@ -0,0 +1,50 @@ +package de.anomic.data.wiki; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.io.UnsupportedEncodingException; + +import de.anomic.plasma.plasmaSwitchboard; + +public abstract class abstractWikiParser implements wikiParser { + + protected plasmaSwitchboard sb; + + public abstractWikiParser(plasmaSwitchboard sb) { + this.sb = sb; + } + + protected abstract String transform(BufferedReader reader, int length, plasmaSwitchboard sb) throws IOException; + + public String transform(String content) { + return transform(content, this.sb); + } + + public String transform(String content, plasmaSwitchboard sb) { + try { + return transform(new BufferedReader(new StringReader(content)), content.length(), sb); + } catch (IOException e) { + return "internal error: " + e.getMessage(); + } + } + + public String transform(byte[] content) throws UnsupportedEncodingException { + return transform(content, "UTF-8", this.sb); + } + + public String transform(byte[] content, String encoding) throws UnsupportedEncodingException { + return transform(content, encoding, this.sb); + } + + public String transform(byte[] content, String encoding, plasmaSwitchboard switchboard) throws UnsupportedEncodingException { + ByteArrayInputStream bais = new ByteArrayInputStream(content); + try { + return transform(new BufferedReader(new InputStreamReader(bais, encoding)), content.length, switchboard); + } catch (IOException e) { + return "internal error: " + e.getMessage(); + } + } +} diff --git a/source/de/anomic/data/wiki/knwikiParser.java b/source/de/anomic/data/wiki/knwikiParser.java new file mode 100644 index 000000000..7bbfd66ef --- /dev/null +++ b/source/de/anomic/data/wiki/knwikiParser.java @@ -0,0 +1,285 @@ +// wikiParser.java +// --------- +// part of YaCy +// (C) by Michael Peter Christen; mc@anomic.de +// first published on http://www.anomic.de +// Frankfurt, Germany, 2007 +// Created 22.02.2007 +// +// This file is contributed by Franz Brauße +// +// $LastChangedDate: $ +// $LastChangedRevision: $ +// $LastChangedBy: $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +// Using this software in any meaning (reading, learning, copying, compiling, +// running) means that you agree that the Author(s) is (are) not responsible +// for cost, loss of data or any harm that may be caused directly or indirectly +// by usage of this softare or this documentation. The usage of this software +// is on your own risk. The installation and usage (starting/running) of this +// software may allow other people or application to access your computer and +// any attached devices and is highly dependent on the configuration of the +// software which must be done by the user of the software; the author(s) is +// (are) also not responsible for proper configuration and usage of the +// software, even if provoked by documentation provided together with +// the software. +// +// Any changes to this file according to the GPL as documented in the file +// gpl.txt aside this file in the shipment you received can be done to the +// lines that follows this copyright notice here, but changes must not be +// done inside the copyright notive above. A re-distribution must contain +// the intact and unchanged copyright notice. +// Contributions and changes to the program code must be marked as such. + +package de.anomic.data.wiki; + +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.regex.Matcher; + +import de.anomic.data.wiki.tokens.DefinitionListToken; +import de.anomic.data.wiki.tokens.LinkToken; +import de.anomic.data.wiki.tokens.ListToken; +import de.anomic.data.wiki.tokens.SimpleToken; +import de.anomic.data.wiki.tokens.TableToken; +import de.anomic.data.wiki.tokens.Token; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.yacy.yacyCore; + +public class knwikiParser implements wikiParser { + + public final Token[] tokens; + private final String[] BEs; + + public knwikiParser(plasmaSwitchboard sb) { + tokens = new Token[] { + new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), + new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), + new LinkToken(yacyCore.seedDB.mySeed.getPublicAddress(), "Wiki.html?page=", sb), + new ListToken('*', "ul"), + new ListToken('#', "ol"), + new ListToken(':', "blockquote", null), + new ListToken(' ', null, "tt", false), + new DefinitionListToken(), + new TableToken() + }; + ArrayList r = new ArrayList(); + for (int i=0, k, j; i"); + for (int i=0; i1) { + r.add(tokens[i].getBlockElementNames()[j].substring(0, k)); + } else { + r.add(tokens[i].getBlockElementNames()[j]); + } + } + r.add("hr"); + BEs = (String[])r.toArray(new String[r.size()]); + } + + public static void main(String[] args) { + String text = "===T itle===\n" + + "==blubb== was ==ein '''shice'''==...och.bla\n" + + "* ein \n" + + "*==test==\n" + + "** doppelt\n" + + "* ''tess*sst''\n" + + "*** xyz\n" + + "=]*** huch\n" + + "* ehehe***\n" + + "* blubb\n" + + "bliblablo\n\n\n" + + "* blubb\n" + + "{|border=-1\n" + + "|-\n" + + "||bla|| blubb\n" + + "|-\n" + + "||align center|och||huch||\n" + + "|}\n" + + "\n" + + "# bla\n" + + "# blubb\n" + + "'''''ehehehe''''', ne?!\n" + + "[http://www/index.html,ne?!] -\n" + + "[[Image:blubb|BLA]] ---- och\n" + + " blubb1\n" + + " blubb2\n" + + ":doppel-blubb[= huch =]\n" + + ";hier:da\n" + + ";dort:und so\n" + + ";;und:doppelt\n\n\n\n" + + "[[Image:blubb|BLA]]"; + // text = "[=\n=]* bla"; + String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + + "[=denk ich=] mal =]"; + long l = System.currentTimeMillis(); + t = new knwikiParser(null).parse((args.length > 0) ? args[0] : text); + System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); + System.out.println("--- --- ---"); + System.out.println(t); + } + + public String transform(String content) { + return parse(content); + } + + public String transform(String content, plasmaSwitchboard sb) { + return parse(content); + } + + public String transform(byte[] content) throws UnsupportedEncodingException { + return parse(new String(content, "UTF-8")); + } + + public String transform( + byte[] content, String encoding, + plasmaSwitchboard switchboard) throws UnsupportedEncodingException { + return parse(new String(content, encoding)); + } + + public String transform(byte[] content, String encoding) throws UnsupportedEncodingException { + return parse(new String(content, encoding)); + } + + public String parse(String text) { + Text[] tt = Text.split2Texts(text, "[=", "=]"); + for (int i=0; i", " "); + } + + private String replaceBRs(String text) { + StringBuffer sb = new StringBuffer(text.length()); + String[] tt = text.split("\n"); + boolean replace; + for (int i=0, j; i ")) { replace = false; break; } + sb.append(tt[i]); + if (i < tt.length - 1) { + if (replace) sb.append("
"); + sb.append("\n"); + } + } + return new String(sb); + } + + private static class Text { + + public static final String escapeNewLine = "@"; + + private String text; + private final boolean escaped; + private final boolean nl; + + public Text(String text, boolean escaped, boolean newLineBefore) { + this.text = text; + this.escaped = escaped; + this.nl = newLineBefore; + } + + public String setTextPlain(String text) { return this.text = text; } + public String setText(String text) { + if (this.nl) + this.text = text.substring(escapeNewLine.length()); + else + this.text = text; + return this.text; + } + + public String getTextPlain() { return this.text; } + public String getText() { + if (this.nl) + return escapeNewLine + this.text; + else + return this.text; + } + + public String toString() { return this.text; } + public boolean isEscaped() { return this.escaped; } + public boolean isNewLineBefore() { return this.nl; } + + private static Text[] split2Texts(String text, String escapeBegin, String escapeEnd) { + if (text == null) return null; + if (text.length() < 2) return new Text[] { new Text(text, false, true) }; + + int startLen = escapeBegin.length(); + int endLen = escapeEnd.length(); + ArrayList r = new ArrayList(); + boolean escaped = text.startsWith(escapeBegin); + if (escaped) r.add(new Text("", false, true)); + int i, j = 0; + while ((i = text.indexOf((escaped) ? escapeEnd : escapeBegin, j)) > -1) { + r.add(resolve2Text(text, escaped, (j > 0) ? j + ((escaped) ? startLen : endLen) : 0, i, escapeEnd)); + j = i; + escaped = !escaped; + } + r.add(resolve2Text(text, escaped, (escaped) ? j : (j > 0) ? j + endLen : 0, -1, escapeEnd)); + return (Text[])r.toArray(new Text[r.size()]); + } + + private static Text resolve2Text(String text, boolean escaped, int from, int to, String escapeEnd) { + if (to == -1) to = text.length(); + return new Text( + text.substring(from, to), + escaped, + from < escapeEnd.length() + 2 || (!escaped && text.charAt(from - escapeEnd.length() - 1) == '\n')); + } + + private static String mergeTexts(Text[] texts) { + StringBuffer sb = new StringBuffer(); + for (int n=0; n < texts.length; n++) + sb.append(texts[n].getTextPlain()); + return new String(sb); + } + } +} diff --git a/source/de/anomic/data/wiki/tokens/AbstractToken.java b/source/de/anomic/data/wiki/tokens/AbstractToken.java index dbffa1b6a..231afecb3 100644 --- a/source/de/anomic/data/wiki/tokens/AbstractToken.java +++ b/source/de/anomic/data/wiki/tokens/AbstractToken.java @@ -47,15 +47,17 @@ package de.anomic.data.wiki.tokens; +import de.anomic.data.wiki.wikiParserException; + public abstract class AbstractToken implements Token { protected String text = null; protected String markup = null; protected boolean parsed = false; - protected abstract void parse(); + protected abstract void parse() throws wikiParserException; - public String getMarkup() { + public String getMarkup() throws wikiParserException { if (this.text == null) throw new IllegalArgumentException(); if (!this.parsed) parse(); @@ -64,5 +66,5 @@ public abstract class AbstractToken implements Token { public String getText() { return this.text; } - public String toString() { return getMarkup(); } + public String toString() { try { return getMarkup(); } catch (wikiParserException e) { return null; } } } diff --git a/source/de/anomic/data/wiki/tokens/LinkToken.java b/source/de/anomic/data/wiki/tokens/LinkToken.java index 74e6aa84a..3e27b1bff 100644 --- a/source/de/anomic/data/wiki/tokens/LinkToken.java +++ b/source/de/anomic/data/wiki/tokens/LinkToken.java @@ -55,7 +55,7 @@ import java.util.regex.Pattern; import de.anomic.data.bookmarksDB; import de.anomic.data.bookmarksDB.Bookmark; import de.anomic.data.bookmarksDB.Tag; -import de.anomic.data.wiki.WikiParserException; +import de.anomic.data.wiki.wikiParserException; import de.anomic.plasma.plasmaSwitchboard; public class LinkToken extends AbstractToken { @@ -106,13 +106,13 @@ public class LinkToken extends AbstractToken { this.sb = sb; } - protected void parse() { + protected void parse() throws wikiParserException { StringBuffer sb = new StringBuffer(); if (this.patternNr < 0 || this.patternNr >= patterns.length) - throw new WikiParserException("patternNr was not set correctly: " + this.patternNr); + throw new wikiParserException("patternNr was not set correctly: " + this.patternNr); Matcher m = patterns[this.patternNr].matcher(this.text); if (!m.find()) - throw new WikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text); + throw new wikiParserException("Didn't find match for: (" + this.patternNr + ") " + this.text); switch (this.patternNr) { case IMG: diff --git a/source/de/anomic/data/wiki/tokens/SimpleToken.java b/source/de/anomic/data/wiki/tokens/SimpleToken.java index 2290b9d02..ac701c429 100644 --- a/source/de/anomic/data/wiki/tokens/SimpleToken.java +++ b/source/de/anomic/data/wiki/tokens/SimpleToken.java @@ -51,7 +51,7 @@ import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; -import de.anomic.data.wiki.WikiParserException; +import de.anomic.data.wiki.wikiParserException; public class SimpleToken extends AbstractToken { @@ -88,7 +88,7 @@ public class SimpleToken extends AbstractToken { "([\\" + lastChar + "]{" + i + "," + definitionList.length + "})")}; } - public String getMarkup() { + public String getMarkup() throws wikiParserException { if (this.content == null) { if (this.text == null) { throw new IllegalArgumentException(); @@ -96,14 +96,14 @@ public class SimpleToken extends AbstractToken { setText(this.text, 0); } } - if (!this.parsed) try { parse(); } catch (WikiParserException e) { return this.text; } + if (!this.parsed) parse(); return this.markup; } - protected void parse() { + protected void parse() throws wikiParserException { String[] e; if (this.grade >= this.definitionList.length || (e = this.definitionList[this.grade]) == null) - throw new WikiParserException("Token not defined for grade: " + this.grade); + throw new wikiParserException("Token not defined for grade: " + this.grade); this.markup = getMarkup(e); this.parsed = true; } diff --git a/source/de/anomic/data/wiki/tokens/Token.java b/source/de/anomic/data/wiki/tokens/Token.java index 18393db1c..0d5675e9c 100644 --- a/source/de/anomic/data/wiki/tokens/Token.java +++ b/source/de/anomic/data/wiki/tokens/Token.java @@ -49,11 +49,13 @@ package de.anomic.data.wiki.tokens; import java.util.regex.Pattern; +import de.anomic.data.wiki.wikiParserException; + public interface Token { public Pattern[] getRegex(); public boolean setText(String text, int patternNr); public String getText(); - public String getMarkup(); + public String getMarkup() throws wikiParserException; public String[] getBlockElementNames(); } diff --git a/source/de/anomic/data/wiki/wikiParser.java b/source/de/anomic/data/wiki/wikiParser.java index 3165b07bb..dc2d82038 100644 --- a/source/de/anomic/data/wiki/wikiParser.java +++ b/source/de/anomic/data/wiki/wikiParser.java @@ -1,260 +1,14 @@ -// wikiParser.java -// --------- -// part of YaCy -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2007 -// Created 22.02.2007 -// -// This file is contributed by Franz Brauße -// -// $LastChangedDate: $ -// $LastChangedRevision: $ -// $LastChangedBy: $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - package de.anomic.data.wiki; -import java.util.ArrayList; -import java.util.regex.Matcher; +import java.io.UnsupportedEncodingException; -import de.anomic.data.wiki.tokens.DefinitionListToken; -import de.anomic.data.wiki.tokens.LinkToken; -import de.anomic.data.wiki.tokens.ListToken; -import de.anomic.data.wiki.tokens.SimpleToken; -import de.anomic.data.wiki.tokens.TableToken; -import de.anomic.data.wiki.tokens.Token; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.yacy.yacyCore; -public class wikiParser { - - public final Token[] tokens; - private final String[] BEs; +public interface wikiParser { - public wikiParser(plasmaSwitchboard sb) { - tokens = new Token[] { - new SimpleToken('=', '=', new String[][] { null, { "h2" }, { "h3" }, { "h4" } }, true), - new SimpleToken('\'', '\'', new String[][] { null, { "i" }, { "b" }, null, { "b", "i" } }, false), - new LinkToken(yacyCore.seedDB.mySeed.getPublicAddress(), "Wiki.html?page=", sb), - new ListToken('*', "ul"), - new ListToken('#', "ol"), - new ListToken(':', "blockquote", null), - new ListToken(' ', null, "tt", false), - new DefinitionListToken(), - new TableToken() - }; - ArrayList r = new ArrayList(); - for (int i=0, k, j; i1) { - r.add(tokens[i].getBlockElementNames()[j].substring(0, k)); - } else { - r.add(tokens[i].getBlockElementNames()[j]); - } - } - r.add("hr"); - BEs = (String[])r.toArray(new String[r.size()]); - } - - public static void main(String[] args) { - String text = "===T itle===\n" + - "==blubb== was ==ein '''shice'''==...och.bla\n" + - "* ein \n" + - "*==test==\n" + - "** doppelt\n" + - "* ''tess*sst''\n" + - "*** xyz\n" + - "=]*** huch\n" + - "* ehehe***\n" + - "* blubb\n" + - "bliblablo\n\n\n" + - "* blubb\n" + - "{|border=-1\n" + - "|-\n" + - "||bla|| blubb\n" + - "|-\n" + - "||align center|och||huch||\n" + - "|}\n" + - "\n" + - "# bla\n" + - "# blubb\n" + - "'''''ehehehe''''', ne?!\n" + - "[http://www/index.html,ne?!] -\n" + - "[[Image:blubb|BLA]] ---- och\n" + - " blubb1\n" + - " blubb2\n" + - ":doppel-blubb[= huch =]\n" + - ";hier:da\n" + - ";dort:und so\n" + - ";;und:doppelt\n\n\n\n" + - "[[Image:blubb|BLA]]"; - // text = "[=\n=]* bla"; - String t = "[=] ein fucking [= test =]-text[=,ne?!=] joa, [=alles=]wunderbar," + - "[=denk ich=] mal =]"; - long l = System.currentTimeMillis(); - t = new wikiParser(null).parse((args.length > 0) ? args[0] : text); - System.out.println("parsing time: " + (System.currentTimeMillis() - l) + " ms"); - System.out.println("--- --- ---"); - System.out.println(t); - } - - public String parse(String text) { - Text[] tt = Text.split2Texts(text, "[=", "=]"); - for (int i=0; i", "
- public static String[] xmlentities={ - // Ampersands _have_ to be replaced first. If they were replaced later, - // other replaced characters containing ampersands would get messed up. - "\u0026","&", //ampersand - "\"",""", //quotation mark - "\u003C","<", //less than - "\u003E",">", //greater than - }; - - //This array contains codes (see http://mindprod.com/jgloss/unicode.html for details) and - //patterns that will be replaced. To add new codes or patterns, just put them at the end - //of the list. Codes or patterns in this list can not be escaped with [= or- public static String[] htmlentities={ - "\u005E","^", // Caret - - "\u0060","`", // Accent Grave ` - "\u007B","{", // { - "\u007C","|", // | - "\u007D","}", // } - "\u007E","~", // ~ - - "\u0082","", - "\u0083","", - "\u0084","", - "\u0085"," ", - "\u0086","", - "\u0087","", - "\u0088","", - "\u0089","", - "\u008A","", - "\u008B","", - "\u008C","", - "\u008D","", - "\u008E","", - - "\u0091","", - "\u0092","", - "\u0093","", - "\u0094","", - "\u0095","", - "\u0096","", - "\u0097","", - "\u0098","", - "\u0099","", - "\u009A","", - "\u009B","", - "\u009C","", - "\u009D","", - "\u009E","", - "\u009F","", - - "\u00A1","¡", //inverted (spanish) exclamation mark - "\u00A2","¢", //cent - "\u00A3","£", //pound - "\u00A4","¤", //currency - "\u00A5","¥", //yen - "\u00A6","¦", //broken vertical bar - "\u00A7","§", //section sign - "\u00A8","¨", //diaeresis (umlaut) - "\u00A9","©", //copyright sign - "\u00AA","ª", //feminine ordinal indicator - "\u00AB","«", //left-pointing double angle quotation mark - "\u00AC","¬", //not sign - "\u00AD","", //soft hyphen - "\u00AE","®", //registered sign - "\u00AF","¯", //macron - "\u00B0","°", //degree sign - "\u00B1","±", //plus-minus sign - "\u00B2","²", //superscript two - "\u00B3","³", //superscript three - "\u00B4","´", //acute accent - "\u00B5","µ", //micro sign - "\u00B6","¶", //paragraph sign - "\u00B7","·", //middle dot - "\u00B8","¸", //cedilla - "\u00B9","¹", //superscript one - "\u00BA","º", //masculine ordinal indicator - "\u00BB","»", //right-pointing double angle quotation mark - "\u00BC","¼", //fraction 1/4 - "\u00BD","½", //fraction 1/2 - "\u00BE","¾", //fraction 3/4 - "\u00BF","¿", //inverted (spanisch) questionmark - "\u00C0","À", - "\u00C1","Á", - "\u00C2","Â", - "\u00C3","Ã", - "\u00C4","Ä", - "\u00C5","Å", - "\u00C6","Æ", - "\u00C7","Ç", - "\u00C8","È", - "\u00C9","É", - "\u00CA","Ê", - "\u00CB","Ë", - "\u00CC","Ì", - "\u00CD","Í", - "\u00CE","Î", - "\u00CF","Ï", - "\u00D0","Ð", - "\u00D1","Ñ", - "\u00D2","Ò", - "\u00D3","Ó", - "\u00D4","Ô", - "\u00D5","Õ", - "\u00D6","Ö", - "\u00D7","×", - "\u00D8","Ø", - "\u00D9","Ù", - "\u00DA","Ú", - "\u00DB","Û", - "\u00DC","Ü", - "\u00DD","Ý", - "\u00DE","Þ", - "\u00DF","ß", - "\u00E0","à", - "\u00E1","á", - "\u00E2","â", - "\u00E3","ã", - "\u00E4","ä", - "\u00E5","å", - "\u00E6","æ", - "\u00E7","ç", - "\u00E8","è", - "\u00E9","é", - "\u00EA","ê", - "\u00EB","ë", - "\u00EC","ì", - "\u00ED","í", - "\u00EE","î", - "\u00EF","ï", - "\u00F0","ð", - "\u00F1","ñ", - "\u00F2","ò", - "\u00F3","ó", - "\u00F4","ô", - "\u00F5","õ", - "\u00F6","ö", - "\u00F7","÷", - "\u00F8","ø", - "\u00F9","ù", - "\u00FA","ú", - "\u00FB","û", - "\u00FC","ü", - "\u00FD","ý", - "\u00FE","þ", - "\u00FF","ÿ" - }; /** This method processes tables in the wiki code. * @param a string that might contain parts of a table @@ -1073,12 +820,12 @@ public class wikiCode { public String transformLine(String result, plasmaSwitchboard switchboard) { //If HTML has not bee replaced yet (can happen if method gets called in recursion), replace now! if (!replacedHTML || preformattedSpan){ - result = replaceXMLEntities(result); + result = htmlTools.replaceXMLEntities(result); replacedHTML = true; } //If special characters have not bee replaced yet, replace now! if (!replacedCharacters || preformattedSpan){ - result = replaceHTMLEntities(result); + result = htmlTools.replaceHTMLEntities(result); replacedCharacters = true; } diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index 5f89ecd34..df7f57676 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -64,6 +64,7 @@ import java.util.Iterator; import java.util.Properties; import java.util.StringTokenizer; +import de.anomic.data.htmlTools; import de.anomic.data.userDB; import de.anomic.data.wikiCode; import de.anomic.kelondro.kelondroBase64Order; @@ -899,11 +900,11 @@ public final class httpd implements serverHandler { // 06.01.2007: decode HTML entities by [FB] public static String decodeHtmlEntities(String s) { // replace all entities defined in wikiCode.characters and htmlentities - for (int i=1; i