*) Bugfix for "download of non supported file content" via crawler

See: http://www.yacy-forum.de/viewtopic.php?p=10724#10724

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@835 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
theli 2005-10-03 08:45:39 +00:00
parent 2b3f964037
commit 28c5687ff9
2 changed files with 13 additions and 9 deletions

View File

@ -541,9 +541,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
);
// handle file types and make (possibly transforming) output stream
if ((!(transformer.isIdentityTransformer())) &&
((ext == null) || (!(plasmaParser.mediaExtContains(ext)))) &&
(plasmaParser.realtimeParsableMimeTypesContains(res.responseHeader.mime()))) {
if (
(!transformer.isIdentityTransformer()) &&
(plasmaParser.supportedFileExt(url)) &&
(plasmaParser.realtimeParsableMimeTypesContains(res.responseHeader.mime()))
) {
// make a transformer
this.theLogger.logFine("create transformer for URL " + url);
hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0));

View File

@ -355,12 +355,10 @@ public final class plasmaCrawlWorker extends Thread {
File cacheFile = cacheManager.getCachePath(url);
try {
String error = null;
if ((!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) &&
(!(plasmaParser.supportedFileExt(url)))) {
// if the response has not the right file type then reject file
remote.close();
log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString());
} else {
if (
(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime())) &&
(plasmaParser.supportedFileExt(url))
) {
if (cacheFile.isFile()) {
cacheManager.deleteFile(url);
}
@ -376,6 +374,10 @@ public final class plasmaCrawlWorker extends Thread {
} finally {
if (fos!=null)try{fos.close();}catch(Exception e){}
}
} else {
// if the response has not the right file type then reject file
remote.close();
log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString());
}
// enQueue new entry with response header
if (profile != null) {