* files distributed by yacy are utf-8, files from repository use the system default charset

* fixes http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1564#p11092
  and http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1550


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5345 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
f1ori 2008-11-14 20:49:16 +00:00
parent 8c96bc2ac1
commit d49ffcd818

View File

@ -696,17 +696,24 @@ public final class httpdFileHandler {
fis = new BufferedInputStream(new FileInputStream(targetFile));
}
// detect charset of html-files
if(path.endsWith("html") || path.endsWith("htm")) {
// save position
fis.mark(1000);
// scrape document to look up charset
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset());
// reset position
fis.reset();
if(charset != null)
mimeType = mimeType + "; charset="+charset;
if(mimeType.startsWith("text")) {
// every text-file distributed by yacy is UTF-8
if(!path.startsWith("/repository")) {
mimeType = mimeType + "; charset=UTF-8";
} else {
// detect charset of html-files
if((path.endsWith("html") || path.endsWith("htm"))) {
// save position
fis.mark(1000);
// scrape document to look up charset
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset());
if(charset != null)
mimeType = mimeType + "; charset="+charset;
// reset position
fis.reset();
}
}
}
// write the array to the client