This commit is contained in:
luc 2016-01-29 08:20:10 +01:00
commit f7b854465b
10 changed files with 153 additions and 59 deletions

Binary file not shown.

View File

@ -52,5 +52,11 @@
<version>1.5</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>com.adobe.xmp</groupId>
<artifactId>xmpcore</artifactId>
<version>5.1.2</version>
<type>jar</type>
</dependency>
</dependencies>
</project>

View File

@ -1,5 +1,10 @@
package pt.tumba.parser.swf;
import com.adobe.xmp.XMPConst;
import com.adobe.xmp.XMPException;
import com.adobe.xmp.XMPMeta;
import com.adobe.xmp.XMPMetaFactory;
import com.adobe.xmp.properties.XMPProperty;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
@ -34,15 +39,9 @@ public class SWF2HTML extends SWFTagTypesImpl {
}
/**
* Description of the Field
*/
protected Map fontCodes = new HashMap();
/**
* Description of the Field
*/
protected PrintWriter output;
protected String headerstr ="";
protected PrintWriter output; // body of html output (containing all text)
//private HTMLParser aux;
@ -159,6 +158,65 @@ public class SWF2HTML extends SWFTagTypesImpl {
return new TextDumper();
}
/**
* Parse and interprete Metadata string (xmp rdf format) and create a
* html header tags for the html output
*
* @param xml Metadata
* @throws IOException
*/
@Override
public void tagMetaData(String xml) throws IOException {
try {
XMPMeta xmpmeta = XMPMetaFactory.parseFromString(xml);
XMPProperty xp = xmpmeta.getProperty(XMPConst.NS_DC, "title");
if (xp != null) {
headerstr = "<title>" + xp.getValue() + "</title>";
}
xp = xmpmeta.getProperty(XMPConst.NS_DC, "creator");
if (xp != null) {
headerstr += "<meta name=\"author\" content=\"" + xp.getValue() + "\">";
}
xp = xmpmeta.getProperty(XMPConst.NS_DC, "description");
if (xp != null) {
headerstr += "<meta name=\"description\" content=\"" + xp.getValue() + "\">";
}
xp = xmpmeta.getProperty(XMPConst.NS_DC, "subject");
if (xp != null) {
headerstr += "<meta name=\"keywords\" content=\"" + xp.getValue() + "\">";
}
// get a date (modified , created)
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "ModifyDate");
if (xp != null) {
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
} else {
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "CreateDate");
if (xp != null) {
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
} else {
xp = xmpmeta.getProperty(XMPConst.NS_DC, "date");
if (xp != null) {
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
}
}
}
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "CreatorTool");
if (xp != null) {
headerstr += "<meta name=\"generator\" content=\"" + xp.getValue() + "\">";
}
xp = xmpmeta.getProperty(XMPConst.NS_DC, "publisher");
if (xp != null) {
headerstr += "<meta name=\"publisher\" content=\"" + xp.getValue() + "\">";
}
} catch (XMPException ex) { }
}
/**
* Description of the Class
@ -167,13 +225,8 @@ public class SWF2HTML extends SWFTagTypesImpl {
*@created 15 de Setembro de 2002
*/
public class TextDumper implements SWFText {
/**
* Description of the Field
*/
protected Integer fontId;
/**
* Description of the Field
*/
protected boolean firstY = true;
@ -285,23 +338,26 @@ public class SWF2HTML extends SWFTagTypesImpl {
/**
* Arguments are: 0. Name of input SWF
* Parses swf input and extracts text and wrap it as html
*
*@param in Description of the Parameter
*@return Description of the Return Value
*@exception Exception Description of the Exception
* @param in SWF inputstream
* @return html of text in swf
* @exception Exception Description of the Exception
*/
public String convertSWFToHTML(InputStream in) throws Exception {
StringWriter out1 = new StringWriter();
output = new PrintWriter(out1);
output.println("<html><body>");
TagParser parser = new TagParser(this);
SWFReader reader = new SWFReader(parser, in);
reader.readFile();
in.close();
output.println("</body></html>");
sizeCount = reader.size;
return out1.toString();
// generate html output string
final String ret = "<html>"
+ (headerstr.isEmpty() ? "<body>" : "<header>" + headerstr + "</header><body>")
+ out1.toString()
+ "</body></html>";
return ret;
}

View File

@ -425,4 +425,14 @@ public interface SWFTagTypes extends SWFSpriteTagTypes {
*@exception IOException Description of the Exception
*/
public void tagGeneratorFont(byte[] data) throws IOException;
/**
* Metadata such as title in xml format
* The format of the metadata is RDF that is compliant with Adobes
* Extensible Metadata Platform (XMP) specification.
*
* @param data xml data as string
* @throws IOException
*/
public void tagMetaData(String data) throws IOException;
}

View File

@ -881,4 +881,20 @@ public class SWFTagTypesImpl implements SWFTagTypes {
colors, imageData);
}
}
/**
* SWFTagTypes METADATA
* Metadata such as title in xml format
* The format of the metadata is RDF that is compliant with Adobes
* Extensible Metadata Platform (XMP) specification.
*
* @param data xml data as string
* @throws IOException
*/
@Override
public void tagMetaData (String xml) throws IOException {
if (tags != null) {
tags.tagMetaData(xml);
}
}
}

View File

@ -200,11 +200,14 @@ public class TagParser implements SWFTags, SWFConstants {
parseDefineBits(in);
break;
case TAG_JPEGTABLES:
//parseDefineJPEGTables(in); // TODO: content length=0 (in==null) occurs for unknown reason - find out!
if (in != null) parseDefineJPEGTables(in); // TODO: content length=0 (in==null) occurs for unknown reason - find out!
break;
case TAG_DEFINEBITSJPEG3:
parseDefineBitsJPEG3(in);
break;
case TAG_METADATA:
if (in != null) parseMetaData(in);
break;
default:
//--Unknown Tag Type
tagtypes.tag(tagType, longTag, contents);
@ -366,6 +369,15 @@ public class TagParser implements SWFTags, SWFConstants {
tagtypes.tagDefineBitsJPEG3(id, imageData, alphaData);
}
/**
* parse METADATA tag (TAG_METADATA = 77)
* @param in
* @throws IOException
*/
protected void parseMetaData(InStream in) throws IOException {
String xmlMetaData = in.readString();
tagtypes.tagMetaData(xmlMetaData);
}
/**
* Description of the Method

View File

@ -1203,6 +1203,12 @@ public class TagWriter implements SWFTagTypes, SWFConstants {
}
@Override
public void tagMetaData (String xml) throws IOException {
startTag(TAG_METADATA, true);
out.writeString(xml);
completeTag();
}
//-----------------------------------------------------------------------
/**

View File

@ -1,31 +0,0 @@
/**
* Channels
* Copyright 2010 by Michael Peter Christen
* First released 10.5.2010 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This file is part of YaCy Content Integration
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.document.feed;
public class Channels {
}

View File

@ -66,7 +66,7 @@ import net.yacy.kelondro.util.ISO639;
public class ContentScraper extends AbstractScraper implements Scraper {
private final static int MAX_TAGSIZE = 1024 * 1024;
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
private final char degree = '\u00B0';
private final char[] minuteCharsHTML = "&#039;".toCharArray();
@ -389,16 +389,17 @@ public class ContentScraper extends AbstractScraper implements Scraper {
// itemprop
String itemprop = tag.opts.getProperty("itemprop");
if (itemprop != null) {
String content = tag.opts.getProperty("content");
if (content != null) {
String propval = tag.opts.getProperty("content");
if (propval == null) propval = tag.opts.getProperty("datetime"); // html5 example: <time itemprop="startDate" datetime="2016-01-26">today</time> while each prop is optional
if (propval != null) {
if ("startDate".equals(itemprop)) try {
// parse ISO 8601 date
Date startDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
Date startDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
this.startDates.add(startDate);
} catch (ParseException e) {}
if ("endDate".equals(itemprop)) try {
// parse ISO 8601 date
Date endDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
Date endDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
this.endDates.add(endDate);
} catch (ParseException e) {}
}
@ -1096,10 +1097,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
this.embeds.clear();
this.images.clear();
this.metas.clear();
this.hreflang.clear();
this.navigation.clear();
this.titles.clear();
this.articles.clear();
this.startDates.clear();
this.endDates.clear();
this.headlines = null;
this.bold.clear();
this.italic.clear();
this.underline.clear();
this.li.clear();
this.dt.clear();
this.dd.clear();
this.content.clear();
this.root = null;
}

View File

@ -1486,7 +1486,9 @@ public final class Switchboard extends serverSwitch {
/**
* Initialisize and perform all settings to enable remote crawls
* (if remote crawl is not in use, save the resources)
* (if remote crawl is not in use, save the resources) If called with
* activate==false worker threads are closed and removed (to free resources)
*
* @param activate true=enable, false=disable
*/
public void initRemoteCrawler(final boolean activate) {
@ -1536,6 +1538,9 @@ public final class Switchboard extends serverSwitch {
}
rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000));
rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000));
} else { // activate==false, terminate and remove threads
terminateThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER, true);
terminateThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, true);
}
}
@ -3433,6 +3438,10 @@ public final class Switchboard extends serverSwitch {
}
}
public void initBookmarks(boolean b) {
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
}
public class receiptSending implements Runnable
{
private final Seed initiatorPeer;