mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
- update Maven pom
- add ppt parser test case
This commit is contained in:
parent
3dcfc717eb
commit
b510b182d8
7
pom.xml
7
pom.xml
|
@ -595,7 +595,12 @@
|
|||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-deploy</artifactId>
|
||||
<version>9.2.1.v20140609</version>
|
||||
</dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.bitlet</groupId>
|
||||
<artifactId>weupnp</artifactId>
|
||||
<version>0.1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.woodstox</groupId>
|
||||
<artifactId>wstx-asl</artifactId>
|
||||
|
|
|
@ -13,6 +13,7 @@ import net.yacy.document.parser.docParser;
|
|||
import net.yacy.document.parser.odtParser;
|
||||
import net.yacy.document.parser.ooxmlParser;
|
||||
import net.yacy.document.parser.pdfParser;
|
||||
import net.yacy.document.parser.pptParser;
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Test;
|
||||
|
@ -149,5 +150,45 @@ public class ParserTest {
|
|||
}
|
||||
} catch (final InterruptedException ex) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Powerpoint parser test *
|
||||
*/
|
||||
@Test
|
||||
public void testpptParsers() throws FileNotFoundException, Parser.Failure, MalformedURLException, UnsupportedEncodingException, IOException, InterruptedException {
|
||||
final String[][] testFiles = new String[][]{
|
||||
// meaning: filename in test/parsertest, mimetype, title, creator, description,
|
||||
new String[]{"umlaute_linux.ppt", "application/powerpoint", "In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen", "", ""},
|
||||
new String[]{"umlaute_windows.ppt", "application/powerpoint", "In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen", "afieg", ""},
|
||||
new String[]{"umlaute_mac.ppt", "application/powerpoint", "In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen", "Bob", ""}
|
||||
};
|
||||
|
||||
for (final String[] testFile : testFiles) {
|
||||
|
||||
final String filename = "test/parsertest/" + testFile[0];
|
||||
final File file = new File(filename);
|
||||
final String mimetype = testFile[1];
|
||||
final AnchorURL url = new AnchorURL("http://localhost/" + filename);
|
||||
|
||||
AbstractParser p = new pptParser();
|
||||
final Document[] docs = p.parse(url, mimetype, null, new FileInputStream(file));
|
||||
for (final Document doc : docs) {
|
||||
final Reader content = new InputStreamReader(doc.getTextStream(), doc.getCharset());
|
||||
final StringBuilder str = new StringBuilder();
|
||||
int c;
|
||||
while ((c = content.read()) != -1) {
|
||||
str.append((char) c);
|
||||
}
|
||||
|
||||
System.out.println("Parsed " + filename + ": " + str);
|
||||
assertThat(str.toString(), containsString("In München steht ein Hofbräuhaus, dort gibt es Bier in Maßkrügen"));
|
||||
assertThat(doc.dc_title(), containsString(testFile[2]));
|
||||
assertThat(doc.dc_creator(), containsString(testFile[3]));
|
||||
if (testFile[4].length() > 0) {
|
||||
assertThat(doc.dc_description()[0], containsString(testFile[4]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user