mirror of
https://github.com/yacy/yacy_search_server.git
synced 2024-09-19 00:01:41 +02:00
* files distributed by yacy are utf-8, files from repository use the system default charset
* fixes http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1564#p11092 and http://forum.yacy-websuche.de/viewtopic.php?f=6&t=1550 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5345 6c8d7289-2bf4-0310-a012-ef5d649a1542
This commit is contained in:
parent
8c96bc2ac1
commit
d49ffcd818
|
@ -696,17 +696,24 @@ public final class httpdFileHandler {
|
|||
fis = new BufferedInputStream(new FileInputStream(targetFile));
|
||||
}
|
||||
|
||||
// detect charset of html-files
|
||||
if(path.endsWith("html") || path.endsWith("htm")) {
|
||||
// save position
|
||||
fis.mark(1000);
|
||||
// scrape document to look up charset
|
||||
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
|
||||
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset());
|
||||
// reset position
|
||||
fis.reset();
|
||||
if(charset != null)
|
||||
mimeType = mimeType + "; charset="+charset;
|
||||
if(mimeType.startsWith("text")) {
|
||||
// every text-file distributed by yacy is UTF-8
|
||||
if(!path.startsWith("/repository")) {
|
||||
mimeType = mimeType + "; charset=UTF-8";
|
||||
} else {
|
||||
// detect charset of html-files
|
||||
if((path.endsWith("html") || path.endsWith("htm"))) {
|
||||
// save position
|
||||
fis.mark(1000);
|
||||
// scrape document to look up charset
|
||||
final htmlFilterInputStream htmlFilter = new htmlFilterInputStream(fis,"UTF-8",new yacyURL("http://localhost", null),null,false);
|
||||
final String charset = plasmaParser.patchCharsetEncoding(htmlFilter.detectCharset());
|
||||
if(charset != null)
|
||||
mimeType = mimeType + "; charset="+charset;
|
||||
// reset position
|
||||
fis.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write the array to the client
|
||||
|
|
Loading…
Reference in New Issue
Block a user