2009-07-18 00:03:34 +02:00
package de.anomic.document ;
import static org.junit.Assert.* ;
import org.junit.Test ;
2009-07-18 17:04:34 +02:00
import static org.junit.matchers.JUnitMatchers.* ;
2009-07-18 00:03:34 +02:00
import java.io.File ;
import java.io.FileInputStream ;
import java.io.Reader ;
import java.io.InputStreamReader ;
2009-07-18 17:04:34 +02:00
import de.anomic.document.Document ;
2009-07-18 00:03:34 +02:00
import de.anomic.document.Parser ;
import de.anomic.yacy.yacyURL ;
public class ParserTest {
@Test public void testParsers ( ) throws java . io . FileNotFoundException , java . lang . InterruptedException ,
de . anomic . document . ParserException , java . net . MalformedURLException ,
java . io . UnsupportedEncodingException , java . io . IOException {
String [ ] [ ] testFiles = new String [ ] [ ] {
2009-07-18 17:04:34 +02:00
// meaning: filename in test/parsertest, mimetype, title, creator, description,
2009-08-08 17:34:41 +02:00
new String [ ] { " umlaute_windows.docx " , " application/vnd.openxmlformats-officedocument.wordprocessingml.document " , " In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen " , " " , " " } ,
2009-07-18 17:04:34 +02:00
new String [ ] { " umlaute_linux.odt " , " application/vnd.oasis.opendocument.text " , " Münchner Hofbräuhaus " , " " , " Kommentar zum Hofbräuhaus " } ,
new String [ ] { " umlaute_linux.ods " , " application/vnd.oasis.opendocument.spreadsheat " , " " , " " , " " } ,
new String [ ] { " umlaute_linux.odp " , " application/vnd.oasis.opendocument.presentation " , " " , " " , " " } ,
new String [ ] { " umlaute_linux.pdf " , " application/pdf " , " " , " " , " " } ,
new String [ ] { " umlaute_windows.doc " , " application/msword " , " " , " " , " " } ,
2009-07-18 00:03:34 +02:00
} ;
for ( int i = 0 ; i < testFiles . length ; i + + ) {
String filename = " test/parsertest/ " + testFiles [ i ] [ 0 ] ;
File file = new File ( filename ) ;
String mimetype = testFiles [ i ] [ 1 ] ;
yacyURL url = new yacyURL ( " http://localhost/ " + filename ) ;
Document doc = Parser . parseSource ( url , mimetype , null , file . length ( ) , new FileInputStream ( file ) ) ;
Reader content = new InputStreamReader ( doc . getText ( ) , doc . getCharset ( ) ) ;
StringBuilder str = new StringBuilder ( ) ;
int c ;
while ( ( c = content . read ( ) ) ! = - 1 )
str . append ( ( char ) c ) ;
2009-07-18 17:04:34 +02:00
System . out . println ( " Parsed " + filename + " : " + str ) ;
2009-07-18 00:03:34 +02:00
2009-07-18 17:04:34 +02:00
assertThat ( str . toString ( ) , containsString ( " In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen " ) ) ;
assertThat ( doc . dc_title ( ) , containsString ( testFiles [ i ] [ 2 ] ) ) ;
assertThat ( doc . dc_creator ( ) , containsString ( testFiles [ i ] [ 3 ] ) ) ;
assertThat ( doc . dc_description ( ) , containsString ( testFiles [ i ] [ 4 ] ) ) ;
2009-07-18 00:03:34 +02:00
}
}
}