mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
Merge branch 'master' of https://github.com/yacy/yacy_search_server
This commit is contained in:
commit
f7b854465b
Binary file not shown.
|
@ -52,5 +52,11 @@
|
|||
<version>1.5</version>
|
||||
<type>jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.adobe.xmp</groupId>
|
||||
<artifactId>xmpcore</artifactId>
|
||||
<version>5.1.2</version>
|
||||
<type>jar</type>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -1,5 +1,10 @@
|
|||
package pt.tumba.parser.swf;
|
||||
|
||||
import com.adobe.xmp.XMPConst;
|
||||
import com.adobe.xmp.XMPException;
|
||||
import com.adobe.xmp.XMPMeta;
|
||||
import com.adobe.xmp.XMPMetaFactory;
|
||||
import com.adobe.xmp.properties.XMPProperty;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
@ -34,15 +39,9 @@ public class SWF2HTML extends SWFTagTypesImpl {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
protected Map fontCodes = new HashMap();
|
||||
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
protected PrintWriter output;
|
||||
protected String headerstr ="";
|
||||
protected PrintWriter output; // body of html output (containing all text)
|
||||
|
||||
//private HTMLParser aux;
|
||||
|
||||
|
@ -159,6 +158,65 @@ public class SWF2HTML extends SWFTagTypesImpl {
|
|||
return new TextDumper();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and interprete Metadata string (xmp rdf format) and create a
|
||||
* html header tags for the html output
|
||||
*
|
||||
* @param xml Metadata
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void tagMetaData(String xml) throws IOException {
|
||||
|
||||
try {
|
||||
XMPMeta xmpmeta = XMPMetaFactory.parseFromString(xml);
|
||||
XMPProperty xp = xmpmeta.getProperty(XMPConst.NS_DC, "title");
|
||||
if (xp != null) {
|
||||
headerstr = "<title>" + xp.getValue() + "</title>";
|
||||
}
|
||||
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_DC, "creator");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"author\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_DC, "description");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"description\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_DC, "subject");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"keywords\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
|
||||
// get a date (modified , created)
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "ModifyDate");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
|
||||
} else {
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "CreateDate");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
|
||||
} else {
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_DC, "date");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"date\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_XMP, "CreatorTool");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"generator\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
|
||||
xp = xmpmeta.getProperty(XMPConst.NS_DC, "publisher");
|
||||
if (xp != null) {
|
||||
headerstr += "<meta name=\"publisher\" content=\"" + xp.getValue() + "\">";
|
||||
}
|
||||
|
||||
} catch (XMPException ex) { }
|
||||
}
|
||||
|
||||
/**
|
||||
* Description of the Class
|
||||
|
@ -167,13 +225,8 @@ public class SWF2HTML extends SWFTagTypesImpl {
|
|||
*@created 15 de Setembro de 2002
|
||||
*/
|
||||
public class TextDumper implements SWFText {
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
|
||||
protected Integer fontId;
|
||||
/**
|
||||
* Description of the Field
|
||||
*/
|
||||
protected boolean firstY = true;
|
||||
|
||||
|
||||
|
@ -285,23 +338,26 @@ public class SWF2HTML extends SWFTagTypesImpl {
|
|||
|
||||
|
||||
/**
|
||||
* Arguments are: 0. Name of input SWF
|
||||
* Parses swf input and extracts text and wrap it as html
|
||||
*
|
||||
*@param in Description of the Parameter
|
||||
*@return Description of the Return Value
|
||||
*@exception Exception Description of the Exception
|
||||
* @param in SWF inputstream
|
||||
* @return html of text in swf
|
||||
* @exception Exception Description of the Exception
|
||||
*/
|
||||
public String convertSWFToHTML(InputStream in) throws Exception {
|
||||
StringWriter out1 = new StringWriter();
|
||||
output = new PrintWriter(out1);
|
||||
output.println("<html><body>");
|
||||
TagParser parser = new TagParser(this);
|
||||
SWFReader reader = new SWFReader(parser, in);
|
||||
reader.readFile();
|
||||
in.close();
|
||||
output.println("</body></html>");
|
||||
sizeCount = reader.size;
|
||||
return out1.toString();
|
||||
// generate html output string
|
||||
final String ret = "<html>"
|
||||
+ (headerstr.isEmpty() ? "<body>" : "<header>" + headerstr + "</header><body>")
|
||||
+ out1.toString()
|
||||
+ "</body></html>";
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -425,4 +425,14 @@ public interface SWFTagTypes extends SWFSpriteTagTypes {
|
|||
*@exception IOException Description of the Exception
|
||||
*/
|
||||
public void tagGeneratorFont(byte[] data) throws IOException;
|
||||
|
||||
/**
|
||||
* Metadata such as title in xml format
|
||||
* The format of the metadata is RDF that is compliant with Adobe’s
|
||||
* Extensible Metadata Platform (XMP™) specification.
|
||||
*
|
||||
* @param data xml data as string
|
||||
* @throws IOException
|
||||
*/
|
||||
public void tagMetaData(String data) throws IOException;
|
||||
}
|
||||
|
|
|
@ -881,4 +881,20 @@ public class SWFTagTypesImpl implements SWFTagTypes {
|
|||
colors, imageData);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* SWFTagTypes METADATA
|
||||
* Metadata such as title in xml format
|
||||
* The format of the metadata is RDF that is compliant with Adobe’s
|
||||
* Extensible Metadata Platform (XMP™) specification.
|
||||
*
|
||||
* @param data xml data as string
|
||||
* @throws IOException
|
||||
*/
|
||||
@Override
|
||||
public void tagMetaData (String xml) throws IOException {
|
||||
if (tags != null) {
|
||||
tags.tagMetaData(xml);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -200,11 +200,14 @@ public class TagParser implements SWFTags, SWFConstants {
|
|||
parseDefineBits(in);
|
||||
break;
|
||||
case TAG_JPEGTABLES:
|
||||
//parseDefineJPEGTables(in); // TODO: content length=0 (in==null) occurs for unknown reason - find out!
|
||||
if (in != null) parseDefineJPEGTables(in); // TODO: content length=0 (in==null) occurs for unknown reason - find out!
|
||||
break;
|
||||
case TAG_DEFINEBITSJPEG3:
|
||||
parseDefineBitsJPEG3(in);
|
||||
break;
|
||||
case TAG_METADATA:
|
||||
if (in != null) parseMetaData(in);
|
||||
break;
|
||||
default:
|
||||
//--Unknown Tag Type
|
||||
tagtypes.tag(tagType, longTag, contents);
|
||||
|
@ -366,6 +369,15 @@ public class TagParser implements SWFTags, SWFConstants {
|
|||
tagtypes.tagDefineBitsJPEG3(id, imageData, alphaData);
|
||||
}
|
||||
|
||||
/**
|
||||
* parse METADATA tag (TAG_METADATA = 77)
|
||||
* @param in
|
||||
* @throws IOException
|
||||
*/
|
||||
protected void parseMetaData(InStream in) throws IOException {
|
||||
String xmlMetaData = in.readString();
|
||||
tagtypes.tagMetaData(xmlMetaData);
|
||||
}
|
||||
|
||||
/**
|
||||
* Description of the Method
|
||||
|
|
|
@ -1203,6 +1203,12 @@ public class TagWriter implements SWFTagTypes, SWFConstants {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void tagMetaData (String xml) throws IOException {
|
||||
startTag(TAG_METADATA, true);
|
||||
out.writeString(xml);
|
||||
completeTag();
|
||||
}
|
||||
//-----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
/**
|
||||
* Channels
|
||||
* Copyright 2010 by Michael Peter Christen
|
||||
* First released 10.5.2010 at http://yacy.net
|
||||
*
|
||||
* $LastChangedDate$
|
||||
* $LastChangedRevision$
|
||||
* $LastChangedBy$
|
||||
*
|
||||
* This file is part of YaCy Content Integration
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program in the file lgpl21.txt
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package net.yacy.cora.document.feed;
|
||||
|
||||
public class Channels {
|
||||
|
||||
}
|
|
@ -66,7 +66,7 @@ import net.yacy.kelondro.util.ISO639;
|
|||
public class ContentScraper extends AbstractScraper implements Scraper {
|
||||
|
||||
private final static int MAX_TAGSIZE = 1024 * 1024;
|
||||
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
|
||||
public static final int MAX_DOCSIZE = 40 * 1024 * 1024;
|
||||
|
||||
private final char degree = '\u00B0';
|
||||
private final char[] minuteCharsHTML = "'".toCharArray();
|
||||
|
@ -389,16 +389,17 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
// itemprop
|
||||
String itemprop = tag.opts.getProperty("itemprop");
|
||||
if (itemprop != null) {
|
||||
String content = tag.opts.getProperty("content");
|
||||
if (content != null) {
|
||||
String propval = tag.opts.getProperty("content");
|
||||
if (propval == null) propval = tag.opts.getProperty("datetime"); // html5 example: <time itemprop="startDate" datetime="2016-01-26">today</time> while each prop is optional
|
||||
if (propval != null) {
|
||||
if ("startDate".equals(itemprop)) try {
|
||||
// parse ISO 8601 date
|
||||
Date startDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
|
||||
Date startDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
|
||||
this.startDates.add(startDate);
|
||||
} catch (ParseException e) {}
|
||||
if ("endDate".equals(itemprop)) try {
|
||||
// parse ISO 8601 date
|
||||
Date endDate = ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();
|
||||
Date endDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
|
||||
this.endDates.add(endDate);
|
||||
} catch (ParseException e) {}
|
||||
}
|
||||
|
@ -1096,10 +1097,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
|
|||
this.embeds.clear();
|
||||
this.images.clear();
|
||||
this.metas.clear();
|
||||
this.hreflang.clear();
|
||||
this.navigation.clear();
|
||||
this.titles.clear();
|
||||
this.articles.clear();
|
||||
this.startDates.clear();
|
||||
this.endDates.clear();
|
||||
this.headlines = null;
|
||||
this.bold.clear();
|
||||
this.italic.clear();
|
||||
this.underline.clear();
|
||||
this.li.clear();
|
||||
this.dt.clear();
|
||||
this.dd.clear();
|
||||
this.content.clear();
|
||||
this.root = null;
|
||||
}
|
||||
|
|
|
@ -1486,7 +1486,9 @@ public final class Switchboard extends serverSwitch {
|
|||
|
||||
/**
|
||||
* Initialisize and perform all settings to enable remote crawls
|
||||
* (if remote crawl is not in use, save the resources)
|
||||
* (if remote crawl is not in use, save the resources) If called with
|
||||
* activate==false worker threads are closed and removed (to free resources)
|
||||
*
|
||||
* @param activate true=enable, false=disable
|
||||
*/
|
||||
public void initRemoteCrawler(final boolean activate) {
|
||||
|
@ -1536,6 +1538,9 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
rcl.setBusySleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_BUSYSLEEP, 1000));
|
||||
rcl.setIdleSleep(getConfigLong(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, 10000));
|
||||
} else { // activate==false, terminate and remove threads
|
||||
terminateThread(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER, true);
|
||||
terminateThread(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3433,6 +3438,10 @@ public final class Switchboard extends serverSwitch {
|
|||
}
|
||||
}
|
||||
|
||||
public void initBookmarks(boolean b) {
|
||||
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
|
||||
}
|
||||
|
||||
public class receiptSending implements Runnable
|
||||
{
|
||||
private final Seed initiatorPeer;
|
||||
|
|
Loading…
Reference in New Issue
Block a user