mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
extract author and keywords in .doc and .ppt parser
This commit is contained in:
parent
a5707cd2eb
commit
cb2c17d236
|
@ -86,6 +86,14 @@ public class docParser extends AbstractParser implements Parser {
|
|||
if (title.length() == l) break;
|
||||
l = title.length();
|
||||
}
|
||||
// get keywords (for yacy as array)
|
||||
final String keywords = extractor.getSummaryInformation().getKeywords();
|
||||
final String[] keywlist;
|
||||
if (keywords != null && !keywords.isEmpty()) {
|
||||
keywlist = keywords.split(",");
|
||||
} else {
|
||||
keywlist = null;
|
||||
}
|
||||
|
||||
Document[] docs;
|
||||
docs = new Document[]{new Document(
|
||||
|
@ -94,9 +102,9 @@ public class docParser extends AbstractParser implements Parser {
|
|||
"UTF-8",
|
||||
this,
|
||||
null,
|
||||
null,
|
||||
keywlist,
|
||||
singleList(title),
|
||||
"", // TODO: AUTHOR
|
||||
extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
|
||||
extractor.getDocSummaryInformation().getCompany(), // publisher
|
||||
null,
|
||||
null,
|
||||
|
|
|
@ -78,6 +78,12 @@ public class pptParser extends AbstractParser implements Parser {
|
|||
if (title.length() == l) break;
|
||||
l = title.length();
|
||||
}
|
||||
// get keywords (for yacy as array)
|
||||
final String keywords = pptExtractor.getSummaryInformation().getKeywords();
|
||||
final String[] keywlist;
|
||||
if (keywords != null && !keywords.isEmpty()) {
|
||||
keywlist = keywords.split(",");
|
||||
} else keywlist = null;
|
||||
|
||||
/*
|
||||
* create the plasmaParserDocument for the database
|
||||
|
@ -89,9 +95,9 @@ public class pptParser extends AbstractParser implements Parser {
|
|||
"UTF-8",
|
||||
this,
|
||||
null,
|
||||
null,
|
||||
keywlist,
|
||||
singleList(title),
|
||||
"", // TODO: AUTHOR
|
||||
pptExtractor.getSummaryInformation().getAuthor(), // may be null
|
||||
pptExtractor.getDocSummaryInformation().getCompany(),
|
||||
null,
|
||||
null,
|
||||
|
|
Loading…
Reference in New Issue
Block a user