mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
fixed generic image parser
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7005 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
e1015ead2c
commit
989948e1a9
|
@ -27,6 +27,7 @@
|
||||||
package net.yacy.document.parser.images;
|
package net.yacy.document.parser.images;
|
||||||
|
|
||||||
import java.awt.image.BufferedImage;
|
import java.awt.image.BufferedImage;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
@ -35,10 +36,20 @@ import java.io.IOException;
|
||||||
import java.io.InputStream;import java.net.MalformedURLException;
|
import java.io.InputStream;import java.net.MalformedURLException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import javax.imageio.ImageIO;
|
import javax.imageio.ImageIO;
|
||||||
|
|
||||||
|
import com.drew.imaging.jpeg.JpegProcessingException;
|
||||||
|
import com.drew.imaging.jpeg.JpegSegmentReader;
|
||||||
|
import com.drew.metadata.Directory;
|
||||||
|
import com.drew.metadata.Metadata;
|
||||||
|
import com.drew.metadata.MetadataException;
|
||||||
|
import com.drew.metadata.Tag;
|
||||||
|
import com.drew.metadata.exif.ExifReader;
|
||||||
|
import com.drew.metadata.iptc.IptcReader;
|
||||||
|
|
||||||
import net.yacy.cora.document.MultiProtocolURI;
|
import net.yacy.cora.document.MultiProtocolURI;
|
||||||
import net.yacy.document.AbstractParser;
|
import net.yacy.document.AbstractParser;
|
||||||
import net.yacy.document.Document;
|
import net.yacy.document.Document;
|
||||||
|
@ -103,13 +114,65 @@ public class genericImageParser extends AbstractParser implements Parser {
|
||||||
// http://www.drewnoakes.com/drewnoakes.com/code/exif/
|
// http://www.drewnoakes.com/drewnoakes.com/code/exif/
|
||||||
// javadoc is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/javadoc/
|
// javadoc is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/javadoc/
|
||||||
// a tutorial is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/sampleUsage.html
|
// a tutorial is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/sampleUsage.html
|
||||||
BufferedImage image = null;
|
byte[] b;
|
||||||
try {
|
try {
|
||||||
image = ImageIO.read(sourceStream);
|
b = FileUtils.read(sourceStream);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
Log.logException(e);
|
||||||
throw new Parser.Failure(e.getMessage(), location);
|
throw new Parser.Failure(e.getMessage(), location);
|
||||||
}
|
}
|
||||||
ii = parseJavaImage(location, image);
|
|
||||||
|
ii = parseJavaImage(location, new ByteArrayInputStream(b));
|
||||||
|
|
||||||
|
JpegSegmentReader segmentReader;
|
||||||
|
try {
|
||||||
|
segmentReader = new JpegSegmentReader(new ByteArrayInputStream(b));
|
||||||
|
|
||||||
|
byte[] exifSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APP1);
|
||||||
|
byte[] iptcSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APPD);
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
|
new ExifReader(exifSegment).extract(metadata);
|
||||||
|
new IptcReader(iptcSegment).extract(metadata);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Iterator<Directory> directories = metadata.getDirectoryIterator();
|
||||||
|
HashMap<String, String> props = new HashMap<String, String>();
|
||||||
|
while (directories.hasNext()) {
|
||||||
|
Directory directory = directories.next();
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Iterator<Tag> tags = directory.getTagIterator();
|
||||||
|
while (tags.hasNext()) {
|
||||||
|
Tag tag = tags.next();
|
||||||
|
try {
|
||||||
|
props.put(tag.getTagName(), tag.getDescription());
|
||||||
|
ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
|
||||||
|
} catch (MetadataException e) {
|
||||||
|
Log.logException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
title = props.get("Image Description");
|
||||||
|
if (title == null || title.length() == 0) title = props.get("Headline");
|
||||||
|
if (title == null || title.length() == 0) title = props.get("Object Name");
|
||||||
|
|
||||||
|
author = props.get("Artist");
|
||||||
|
if (author == null || author.length() == 0) author = props.get("Writer/Editor");
|
||||||
|
if (author == null || author.length() == 0) author = props.get("By-line");
|
||||||
|
if (author == null || author.length() == 0) author = props.get("Credit");
|
||||||
|
if (author == null || author.length() == 0) author = props.get("Make");
|
||||||
|
|
||||||
|
keywords = props.get("Keywords");
|
||||||
|
if (keywords == null || keywords.length() == 0) keywords = props.get("Category");
|
||||||
|
if (keywords == null || keywords.length() == 0) keywords = props.get("Supplemental Category(s)");
|
||||||
|
|
||||||
|
description = props.get("Caption/Abstract");
|
||||||
|
if (description == null || description.length() == 0) description = props.get("Country/Primary Location");
|
||||||
|
if (description == null || description.length() == 0) description = props.get("Province/State");
|
||||||
|
if (description == null || description.length() == 0) description = props.get("Copyright Notice");
|
||||||
|
}
|
||||||
|
} catch (JpegProcessingException e) {
|
||||||
|
Log.logException(e);
|
||||||
|
// just ignore
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
ii = parseJavaImage(location, sourceStream);
|
ii = parseJavaImage(location, sourceStream);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user