mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-22 00:00:59 +02:00
b990dc1ad1
*) Replacing PDFBox 0.7.1 lib with newer version 0.7.2 *) Refactoring of classes httpd/httpc/httpHeaders to make many methods for httpHeader/Requestline parsing reusable for new icap implementation *) adding chunked input stream support - needed by new icap implementation - needed by future httpc HTTP/1.1 support *) httpd.java - moving all connection property contants to class httpHeader - moving readHeader function to class httpHeader - moving parseQuery function to class httpHeader - moving handleTransparentProxy function to class httpHeader *) httpHeader.java - adding new fuction to parse the http response line - adding new function to converte http headers to a string that can be send to the client - adding a function that generates a proper url using all parsed connection properties *) ICAP Support - yacy now supports handling of icap response modification requests - this feature can be used by other icap enabled proxies to contact yacy as icap server, and to handover the downloaded content to yacy.logging for indexing - functionality was successfully tested with squid 2.5Stable 10 + icap patch - further icap services e.g. URL filtering based on yacy's blacklists are possible *) plasmaSwitchboard.java - htcache entries that are still needed for indexing are now properly registered as in use after system restart - extended logging: log message now shows parsing and indexing time for each sb. entry git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@757 6c8d7289-2bf4-0310-a012-ef5d649a1542
58 lines
2.3 KiB
XML
58 lines
2.3 KiB
XML
<?xml version="1.0"?>
|
|
<project name="YACY - pdfParser" default="dist">
|
|
<description>
|
|
A class to parse pdf documents (application/pdf)
|
|
</description>
|
|
|
|
<property name="parserShortName" value="pdf"/>
|
|
<property name="parserVersion" value="0.1"/>
|
|
|
|
<property name="parserLongName" value="yacyContentParser_${parserShortName}"/>
|
|
<property name="parserArchive" location="${release}/${parserLongName}_${parserVersion}.tgz"/>
|
|
|
|
<target name="compile">
|
|
<javac srcdir="${src}/de/anomic/plasma/parser/${parserShortName}" destdir="${build}" source="${javacSource}" target="${javacTarget}" debug="true" debuglevel="lines,vars,source">
|
|
<classpath>
|
|
<pathelement location="${build}" />
|
|
|
|
<!-- main lib needed to parse pdf files -->
|
|
<pathelement location="${libx}/PDFBox-0.7.2.jar" />
|
|
</classpath>
|
|
</javac>
|
|
</target>
|
|
|
|
|
|
<target name="zip" depends="compile">
|
|
<tar destfile="${parserArchive}" compression="gzip">
|
|
<tarfileset dir="${libx}"
|
|
includes="PDFBox-0.7.2.*"
|
|
prefix="${releaseDir}/libx/"
|
|
dirmode="755" mode="644"/>
|
|
<tarfileset dir="${src}/de/anomic/plasma/parser/${parserShortName}"
|
|
prefix="${releaseDir}/source/de/anomic/plasma/parser/${parserShortName}"
|
|
dirmode="755" mode="644"/>
|
|
<tarfileset dir="${build}/de/anomic/plasma/parser/${parserShortName}"
|
|
prefix="${releaseDir}/classes/de/anomic/plasma/parser/${parserShortName}"
|
|
dirmode="755" mode="644"/>
|
|
</tar>
|
|
</target>
|
|
|
|
<target name="copy" depends="compile">
|
|
<copy todir="${release}/libx/">
|
|
<fileset dir="${libx}" includes="PDFBox-0.7.2.*"/>
|
|
</copy>
|
|
<copy todir="${release}/source/de/anomic/plasma/parser/${parserShortName}">
|
|
<fileset dir="${src}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/>
|
|
</copy>
|
|
<copy todir="${release}/classes/de/anomic/plasma/parser/${parserShortName}">
|
|
<fileset dir="${build}/de/anomic/plasma/parser/${parserShortName}" includes="**/*"/>
|
|
</copy>
|
|
</target>
|
|
|
|
|
|
<target name="dist" depends="compile,zip" description="Compile and zip the parser"/>
|
|
|
|
|
|
</project>
|
|
|