mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
*) Bugfix for "download of non supported file content" via crawler
See: http://www.yacy-forum.de/viewtopic.php?p=10724#10724 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@835 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
2b3f964037
commit
28c5687ff9
|
@ -541,9 +541,11 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
|
|||
);
|
||||
|
||||
// handle file types and make (possibly transforming) output stream
|
||||
if ((!(transformer.isIdentityTransformer())) &&
|
||||
((ext == null) || (!(plasmaParser.mediaExtContains(ext)))) &&
|
||||
(plasmaParser.realtimeParsableMimeTypesContains(res.responseHeader.mime()))) {
|
||||
if (
|
||||
(!transformer.isIdentityTransformer()) &&
|
||||
(plasmaParser.supportedFileExt(url)) &&
|
||||
(plasmaParser.realtimeParsableMimeTypesContains(res.responseHeader.mime()))
|
||||
) {
|
||||
// make a transformer
|
||||
this.theLogger.logFine("create transformer for URL " + url);
|
||||
hfos = new htmlFilterOutputStream((gzippedOut != null) ? gzippedOut : ((chunkedOut != null)? chunkedOut : respond), null, transformer, (ext.length() == 0));
|
||||
|
|
|
@ -355,12 +355,10 @@ public final class plasmaCrawlWorker extends Thread {
|
|||
File cacheFile = cacheManager.getCachePath(url);
|
||||
try {
|
||||
String error = null;
|
||||
if ((!(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime()))) &&
|
||||
(!(plasmaParser.supportedFileExt(url)))) {
|
||||
// if the response has not the right file type then reject file
|
||||
remote.close();
|
||||
log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString());
|
||||
} else {
|
||||
if (
|
||||
(plasmaParser.supportedMimeTypesContains(res.responseHeader.mime())) &&
|
||||
(plasmaParser.supportedFileExt(url))
|
||||
) {
|
||||
if (cacheFile.isFile()) {
|
||||
cacheManager.deleteFile(url);
|
||||
}
|
||||
|
@ -376,6 +374,10 @@ public final class plasmaCrawlWorker extends Thread {
|
|||
} finally {
|
||||
if (fos!=null)try{fos.close();}catch(Exception e){}
|
||||
}
|
||||
} else {
|
||||
// if the response has not the right file type then reject file
|
||||
remote.close();
|
||||
log.logInfo("REJECTED WRONG MIME/EXT TYPE " + res.responseHeader.mime() + " for URL " + url.toString());
|
||||
}
|
||||
// enQueue new entry with response header
|
||||
if (profile != null) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user