yacy_search_server/source/de/anomic/data/wiki/abstractWikiParser.java
orbiter c08f9b36a4 refactoring of wiki parser.
This was done to prepare the wiki parser as parser for wikipedia dumps, which will be used for performance test (to omit crawling)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5785 6c8d7289-2bf4-0310-a012-ef5d649a1542
2009-04-08 15:28:45 +00:00

66 lines
2.2 KiB
Java

package de.anomic.data.wiki;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
public abstract class abstractWikiParser implements wikiParser {
String address;
public abstractWikiParser(String address) {
this.address = address;
}
protected abstract String transform(BufferedReader reader, int length) throws IOException;
public String transform(final String content) {
try {
return transform(
new BufferedReader(new StringReader(content)),
content.length());
} catch (final IOException e) {
return "internal error: " + e.getMessage();
}
}
public String transform(final String content, final String publicAddress) {
try {
return transform(
new BufferedReader(new StringReader(content)),
content.length());
} catch (final IOException e) {
return "internal error: " + e.getMessage();
}
}
public String transform(final byte[] content) throws UnsupportedEncodingException {
return transform(content, "UTF-8");
}
public String transform(final byte[] content, final String encoding, final String publicAddress) throws UnsupportedEncodingException {
final ByteArrayInputStream bais = new ByteArrayInputStream(content);
try {
return transform(
new BufferedReader(new InputStreamReader(bais, encoding)),
content.length);
} catch (final IOException e) {
return "internal error: " + e.getMessage();
}
}
public String transform(final byte[] content, final String encoding) throws UnsupportedEncodingException {
final ByteArrayInputStream bais = new ByteArrayInputStream(content);
try {
return transform(
new BufferedReader(new InputStreamReader(bais, encoding)),
content.length);
} catch (final IOException e) {
return "internal error: " + e.getMessage();
}
}
}