mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
added parsing of 'date', 'dc:date', 'dc.date' and 'last-modified' in
html meta fields to get a correct (or: better) date timestamp. The http:last-modified mostly does not work because it is set to the current date from most CMS.
This commit is contained in:
parent
9cc8468b30
commit
35ab2cef7b
|
@ -116,7 +116,7 @@ public class searchresult {
|
|||
post.put("defType", "edismax");
|
||||
post.put(CommonParams.Q, solrQ.toString());
|
||||
post.put(CommonParams.ROWS, post.remove("num"));
|
||||
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 5000 : 100));
|
||||
post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 100000000 : 100));
|
||||
|
||||
// set ranking
|
||||
if (post.containsKey("sort")) {
|
||||
|
|
|
@ -94,6 +94,7 @@ public class Document {
|
|||
private final double lon, lat;
|
||||
private final Object parserObject; // the source object that was used to create the Document
|
||||
private final Map<String, Set<String>> generic_facets; // a map from vocabulary names to the set of tags for that vocabulary which apply for this document
|
||||
private final Date date;
|
||||
|
||||
public Document(final DigestURI location, final String mimeType, final String charset,
|
||||
final Object parserObject,
|
||||
|
@ -107,7 +108,8 @@ public class Document {
|
|||
final Map<DigestURI, Properties> anchors,
|
||||
final Map<DigestURI, String> rss,
|
||||
final Map<DigestURI, ImageEntry> images,
|
||||
final boolean indexingDenied) {
|
||||
final boolean indexingDenied,
|
||||
final Date date) {
|
||||
this.source = location;
|
||||
this.mimeType = (mimeType == null) ? "application/octet-stream" : mimeType;
|
||||
this.charset = charset;
|
||||
|
@ -143,6 +145,7 @@ public class Document {
|
|||
this.indexingDenied = indexingDenied;
|
||||
this.text = text == null ? "" : text;
|
||||
this.generic_facets = new HashMap<String, Set<String>>();
|
||||
this.date = date == null ? new Date() : date;
|
||||
}
|
||||
|
||||
public Object getParserObject() {
|
||||
|
@ -451,6 +454,10 @@ dc_rights
|
|||
return this.emaillinks;
|
||||
}
|
||||
|
||||
public Date getDate() {
|
||||
return this.date;
|
||||
}
|
||||
|
||||
public double lon() {
|
||||
return this.lon;
|
||||
}
|
||||
|
@ -783,6 +790,7 @@ dc_rights
|
|||
final Map<DigestURI, String> rss = new HashMap<DigestURI, String>();
|
||||
final Map<DigestURI, ImageEntry> images = new HashMap<DigestURI, ImageEntry>();
|
||||
double lon = 0.0d, lat = 0.0d;
|
||||
Date date = new Date();
|
||||
|
||||
for (final Document doc: docs) {
|
||||
|
||||
|
@ -821,6 +829,7 @@ dc_rights
|
|||
rss.putAll(doc.getRSS());
|
||||
ContentScraper.addAllImages(images, doc.getImages());
|
||||
if (doc.lon() != 0.0 && doc.lat() != 0.0) { lon = doc.lon(); lat = doc.lat(); }
|
||||
if (doc.date.before(date)) date = doc.date;
|
||||
}
|
||||
|
||||
// clean up parser data
|
||||
|
@ -852,7 +861,8 @@ dc_rights
|
|||
anchors,
|
||||
rss,
|
||||
images,
|
||||
false);
|
||||
false,
|
||||
date);
|
||||
}
|
||||
|
||||
public static Map<DigestURI, String> getHyperlinks(final Document[] documents) {
|
||||
|
|
|
@ -100,7 +100,9 @@ public class DCEntry extends MultiMapSolrParams {
|
|||
*/
|
||||
public Date getDate() {
|
||||
String d = this.get("docdatetime");
|
||||
if (d == null) d = this.get("date");
|
||||
if (d == null) d = this.get("dc:date");
|
||||
if (d == null) d = this.get("last-modified");
|
||||
if (d == null) return null;
|
||||
if (d.isEmpty()) return null;
|
||||
try {
|
||||
|
@ -286,7 +288,8 @@ public class DCEntry extends MultiMapSolrParams {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false);
|
||||
false,
|
||||
getDate());
|
||||
}
|
||||
|
||||
public void writeXML(OutputStreamWriter os) throws IOException {
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.io.FileOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
@ -171,7 +172,8 @@ public class audioTagParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)
|
||||
false,
|
||||
new Date())
|
||||
};
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
|
@ -193,7 +195,8 @@ public class audioTagParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false
|
||||
false,
|
||||
new Date()
|
||||
)};
|
||||
} finally {
|
||||
try {
|
||||
|
|
|
@ -30,6 +30,7 @@ import java.io.InputStream;
|
|||
import java.io.InputStreamReader;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -77,7 +78,8 @@ public class csvParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
}
|
||||
|
||||
private static String concatRow(String[] columns) {
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
package net.yacy.document.parser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
|
@ -103,7 +104,8 @@ public class docParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
|
||||
return docs;
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
package net.yacy.document.parser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -65,7 +66,8 @@ public class genericParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
return docs;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,9 @@ import java.io.IOException;
|
|||
import java.io.Writer;
|
||||
import java.net.MalformedURLException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
|
@ -45,6 +47,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
import javax.swing.event.EventListenerList;
|
||||
|
||||
import net.yacy.cora.date.ISO8601Formatter;
|
||||
import net.yacy.cora.document.MultiProtocolURI;
|
||||
import net.yacy.cora.sorting.ClusteredScoreMap;
|
||||
import net.yacy.cora.storage.SizeLimitedMap;
|
||||
|
@ -848,6 +851,28 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
if (s.toLowerCase().startsWith("url=")) return s.substring(4).trim();
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
|
||||
public Date getDate() {
|
||||
String content;
|
||||
|
||||
// <meta name="date" content="YYYY-MM-DD..." />
|
||||
content = this.metas.get("date");
|
||||
if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
|
||||
|
||||
// <meta name="DC.date" content="YYYY-MM-DD" />
|
||||
content = this.metas.get("dc.date");
|
||||
if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
|
||||
|
||||
// <meta name="DC:date" content="YYYY-MM-DD" />
|
||||
content = this.metas.get("dc:date");
|
||||
if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
|
||||
|
||||
// <meta http-equiv="last-modified" content="YYYY-MM-DD" />
|
||||
content = this.metas.get("last-modified");
|
||||
if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content);} catch (ParseException e) {}
|
||||
|
||||
return new Date();
|
||||
}
|
||||
|
||||
// parse location
|
||||
// <meta NAME="ICBM" CONTENT="38.90551492, 1.454004505" />
|
||||
|
|
|
@ -141,7 +141,8 @@ public class htmlParser extends AbstractParser implements Parser {
|
|||
scraper.getAnchors(),
|
||||
scraper.getRSS(),
|
||||
scraper.getImages(),
|
||||
scraper.indexingDenied());
|
||||
scraper.indexingDenied(),
|
||||
scraper.getDate());
|
||||
ppd.setFavicon(scraper.getFavicon());
|
||||
|
||||
return ppd;
|
||||
|
|
|
@ -35,6 +35,7 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
@ -221,7 +222,8 @@ public class genericImageParser extends AbstractParser implements Parser {
|
|||
anchors, // anchors
|
||||
null,
|
||||
images,
|
||||
false)}; // images
|
||||
false,
|
||||
new Date())}; // images
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,6 +27,7 @@ package net.yacy.document.parser;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
@ -116,7 +117,8 @@ public class mmParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
}
|
||||
|
||||
private class FreeMindHandler extends DefaultHandler {
|
||||
|
|
|
@ -30,6 +30,7 @@ package net.yacy.document.parser;
|
|||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
@ -197,7 +198,9 @@ public class odtParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date()
|
||||
)};
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
|
|
@ -30,6 +30,7 @@ package net.yacy.document.parser;
|
|||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
@ -182,7 +183,8 @@ public class ooxmlParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.io.FileInputStream;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.exceptions.CryptographyException;
|
||||
|
@ -125,6 +126,7 @@ public class pdfParser extends AbstractParser implements Parser {
|
|||
// extracting some metadata
|
||||
PDDocumentInformation info = pdfDoc.getDocumentInformation();
|
||||
String docTitle = null, docSubject = null, docAuthor = null, docPublisher = null, docKeywordStr = null;
|
||||
Date docDate = new Date();
|
||||
if (info != null) {
|
||||
docTitle = info.getTitle();
|
||||
docSubject = info.getSubject();
|
||||
|
@ -132,10 +134,9 @@ public class pdfParser extends AbstractParser implements Parser {
|
|||
docPublisher = info.getProducer();
|
||||
if (docPublisher == null || docPublisher.isEmpty()) docPublisher = info.getCreator();
|
||||
docKeywordStr = info.getKeywords();
|
||||
try {if (info.getModificationDate() != null) docDate = info.getModificationDate().getTime();} catch (IOException e) {}
|
||||
// unused:
|
||||
// info.getTrapped());
|
||||
// info.getCreationDate());
|
||||
// info.getModificationDate();
|
||||
}
|
||||
info = null;
|
||||
|
||||
|
@ -218,7 +219,8 @@ public class pdfParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
docDate)};
|
||||
}
|
||||
|
||||
@SuppressWarnings("static-access")
|
||||
|
|
|
@ -29,6 +29,7 @@ package net.yacy.document.parser;
|
|||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -99,7 +100,8 @@ public class pptParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
|
|
@ -34,6 +34,7 @@ import java.io.FileReader;
|
|||
import java.io.FileWriter;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.document.AbstractParser;
|
||||
import net.yacy.document.Document;
|
||||
|
@ -115,7 +116,8 @@ public class psParser extends AbstractParser implements Parser {
|
|||
null, // anchors
|
||||
null, // rss
|
||||
null, // images
|
||||
false)}; // indexingdenied
|
||||
false, // indexingdenied
|
||||
new Date())};
|
||||
|
||||
return docs;
|
||||
} catch (final Exception e) {
|
||||
|
|
|
@ -27,6 +27,7 @@ package net.yacy.document.parser;
|
|||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -59,7 +60,7 @@ public class rdfParser extends AbstractParser implements Parser {
|
|||
|
||||
String all = "rdfdatasource";
|
||||
doc = new Document(url, mimeType, charset, null, null, null, singleList(""), "",
|
||||
"", null, new ArrayList<String>(0), 0, 0, all, null, null, null, false);
|
||||
"", null, new ArrayList<String>(0), 0, 0, all, null, null, null, false, new Date());
|
||||
|
||||
docs.add(doc);
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ import java.io.Reader;
|
|||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -80,7 +81,7 @@ public class RDFaParser extends AbstractParser implements Parser {
|
|||
}
|
||||
|
||||
Document doc = new Document(url, mimeType, charset, null, null, null, singleList(""), "",
|
||||
"", null, new ArrayList<String>(0), 0, 0, null, null, null, null, false);
|
||||
"", null, new ArrayList<String>(0), 0, 0, null, null, null, null, false, new Date());
|
||||
|
||||
try {
|
||||
if (allTriples.length > 0)
|
||||
|
@ -139,7 +140,7 @@ public class RDFaParser extends AbstractParser implements Parser {
|
|||
}
|
||||
|
||||
Document doc = new Document(url, mimeType, charset, null, null, null, singleList(""), "",
|
||||
"", null, new ArrayList<String>(0), 0, 0, all, null, null, null, false);
|
||||
"", null, new ArrayList<String>(0), 0, 0, all, null, null, null, false, new Date());
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,8 @@ public class rssParser extends AbstractParser implements Parser {
|
|||
anchors,
|
||||
null,
|
||||
new HashMap<DigestURI, ImageEntry>(),
|
||||
false);
|
||||
false,
|
||||
item.getPubDate());
|
||||
docs.add(doc);
|
||||
} catch (final MalformedURLException e) {
|
||||
continue;
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
package net.yacy.document.parser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import javax.swing.text.DefaultStyledDocument;
|
||||
import javax.swing.text.rtf.RTFEditorKit;
|
||||
|
@ -84,7 +85,8 @@ public class rtfParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -72,7 +73,8 @@ public class sevenzipParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false);
|
||||
false,
|
||||
new Date());
|
||||
Handler archive;
|
||||
AbstractParser.log.fine("opening 7zip archive...");
|
||||
try {
|
||||
|
|
|
@ -27,6 +27,7 @@ package net.yacy.document.parser;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -95,7 +96,8 @@ public class sidAudioParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
}
|
||||
throw new Parser.Failure("Unable to parse SID file, file does seems to be incomplete (len = " + available + ").", location);
|
||||
} catch (final IOException ex) {
|
||||
|
|
|
@ -96,7 +96,8 @@ public class sitemapParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
new HashMap<DigestURI, ImageEntry>(),
|
||||
false);
|
||||
false,
|
||||
new Date());
|
||||
docs.add(doc);
|
||||
} catch (final MalformedURLException e) {
|
||||
continue;
|
||||
|
|
|
@ -30,6 +30,7 @@ package net.yacy.document.parser;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -125,7 +126,8 @@ public class swfParser extends AbstractParser implements Parser {
|
|||
anchors, // a map of extracted anchors
|
||||
null,
|
||||
null,
|
||||
false)}; // a treeset of image URLs
|
||||
false,
|
||||
new Date())}; // a treeset of image URLs
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -110,7 +111,8 @@ public class torrentParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.io.InputStream;
|
|||
import java.io.InputStreamReader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
|
@ -227,7 +228,8 @@ public class vcfParser extends AbstractParser implements Parser {
|
|||
anchors, // a map of extracted anchors
|
||||
null,
|
||||
null, // a treeset of image URLs
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
if (e instanceof Parser.Failure) throw (Parser.Failure) e;
|
||||
|
|
|
@ -29,6 +29,7 @@ package net.yacy.document.parser;
|
|||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
|
@ -115,7 +116,8 @@ public class vsdParser extends AbstractParser implements Parser {
|
|||
null, // a map of extracted anchors
|
||||
null,
|
||||
null, // a treeset of image URLs
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
package net.yacy.document.parser;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Date;
|
||||
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.document.AbstractParser;
|
||||
|
@ -129,7 +130,8 @@ public class xlsParser extends AbstractParser implements Parser {
|
|||
null,
|
||||
null,
|
||||
null,
|
||||
false)};
|
||||
false,
|
||||
new Date())};
|
||||
} catch (final Exception e) {
|
||||
if (e instanceof InterruptedException) throw (InterruptedException) e;
|
||||
|
||||
|
|
|
@ -441,7 +441,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
|
|||
add(doc, CollectionSchema.author, author);
|
||||
}
|
||||
if (allAttr || contains(CollectionSchema.content_type)) add(doc, CollectionSchema.content_type, new String[]{document.dc_format()});
|
||||
if (allAttr || contains(CollectionSchema.last_modified)) add(doc, CollectionSchema.last_modified, responseHeader == null ? new Date() : responseHeader.lastModified());
|
||||
if (allAttr || contains(CollectionSchema.last_modified)) {
|
||||
Date lastModified = responseHeader == null ? new Date() : responseHeader.lastModified();
|
||||
if (document.getDate().before(lastModified)) lastModified = document.getDate();
|
||||
add(doc, CollectionSchema.last_modified, lastModified);
|
||||
}
|
||||
if (allAttr || contains(CollectionSchema.keywords)) add(doc, CollectionSchema.keywords, document.dc_subject(' '));
|
||||
if (allAttr || contains(CollectionSchema.synonyms_sxt)) {
|
||||
List<String> synonyms = condenser.synonyms();
|
||||
|
|
Loading…
Reference in New Issue
Block a user