add InputStream close after reading input file (Vocabulary_p servlet)

This commit is contained in:
reger 2016-05-24 00:26:28 +02:00
parent 2422626975
commit 4cc38e979d
2 changed files with 54 additions and 55 deletions

View File

@ -101,45 +101,46 @@ public class Vocabulary_p {
discoverFromCSVCharset = charsets.get(0);
ConcurrentLog.info("FileUtils", "detected charset: " + discoverFromCSVCharset + " used to read " + discoverFromCSVFile.toString());
}
// read file
BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(discoverFromCSVFile), discoverFromCSVCharset));
String line = null;
Pattern semicolon = Pattern.compile(";");
Map<String, String> synonym2literal = new HashMap<>(); // helper map to check if there are double synonyms
while ((line = r.readLine()) != null) {
if (line.length() == 0) continue;
String[] l = semicolon.split(line);
if (l.length == 0) l = new String[]{line};
String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim();
if (literal == null) continue;
literal = normalizeLiteral(literal);
String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim();
if (literal.length() > 0) {
String synonyms = "";
if (discoverenrichsynonyms) {
Set<String> sy = SynonymLibrary.getSynonyms(literal);
if (sy != null) {
for (String s: sy) synonyms += "," + s;
// read file (try-with-resource to close inputstream automatically)
try (BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(discoverFromCSVFile), discoverFromCSVCharset))) {
String line = null;
Pattern semicolon = Pattern.compile(";");
Map<String, String> synonym2literal = new HashMap<>(); // helper map to check if there are double synonyms
while ((line = r.readLine()) != null) {
if (line.length() == 0) continue;
String[] l = semicolon.split(line);
if (l.length == 0) l = new String[]{line};
String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim();
if (literal == null) continue;
literal = normalizeLiteral(literal);
String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim();
if (literal.length() > 0) {
String synonyms = "";
if (discoverenrichsynonyms) {
Set<String> sy = SynonymLibrary.getSynonyms(literal);
if (sy != null) {
for (String s: sy) synonyms += "," + s;
}
} else if (discoverreadcolumn) {
synonyms = discovercolumnsynonyms < 0 || l.length <= discovercolumnsynonyms ? null : l[discovercolumnsynonyms].trim();
synonyms = normalizeLiteral(synonyms);
} else {
synonyms = Tagging.normalizeTerm(literal);
}
} else if (discoverreadcolumn) {
synonyms = discovercolumnsynonyms < 0 || l.length <= discovercolumnsynonyms ? null : l[discovercolumnsynonyms].trim();
synonyms = normalizeLiteral(synonyms);
} else {
synonyms = Tagging.normalizeTerm(literal);
}
// check double synonyms
if (synonyms.length() > 0) {
String oldliteral = synonym2literal.get(synonyms);
if (oldliteral != null && !literal.equals(oldliteral)) {
// replace old entry with combined new
table.remove(oldliteral);
String newliteral = oldliteral + "," + literal;
literal = newliteral;
// check double synonyms
if (synonyms.length() > 0) {
String oldliteral = synonym2literal.get(synonyms);
if (oldliteral != null && !literal.equals(oldliteral)) {
// replace old entry with combined new
table.remove(oldliteral);
String newliteral = oldliteral + "," + literal;
literal = newliteral;
}
synonym2literal.put(synonyms, literal);
}
synonym2literal.put(synonyms, literal);
// store term
table.put(literal, new Tagging.SOTuple(synonyms, objectlink == null ? "" : objectlink));
}
// store term
table.put(literal, new Tagging.SOTuple(synonyms, objectlink == null ? "" : objectlink));
}
}
} else {

View File

@ -933,26 +933,24 @@ public final class FileUtils {
*/
public static List<String> detectCharset(File file) throws IOException {
// auto-detect charset, used code from http://jchardet.sourceforge.net/; see also: http://www-archive.mozilla.org/projects/intl/chardet.html
nsDetector det = new nsDetector(nsPSMDetector.ALL);
BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file));
byte[] buf = new byte[1024] ;
int len;
boolean done = false ;
boolean isAscii = true ;
while ((len = imp.read(buf,0,buf.length)) != -1) {
if (isAscii) isAscii = det.isAscii(buf,len);
if (!isAscii && !done) done = det.DoIt(buf,len, false);
List<String> result;
try (BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file))) { // try-with-resource to close inputstream
nsDetector det = new nsDetector(nsPSMDetector.ALL);
byte[] buf = new byte[1024] ;
int len;
boolean done = false ;
boolean isAscii = true ;
while ((len = imp.read(buf,0,buf.length)) != -1) {
if (isAscii) isAscii = det.isAscii(buf,len);
if (!isAscii && !done) done = det.DoIt(buf,len, false);
} det.DataEnd();
result = new ArrayList<>();
if (isAscii) {
result.add(StandardCharsets.US_ASCII.name());
} else {
for (String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
}
}
det.DataEnd();
List<String> result = new ArrayList<>();
if (isAscii) {
result.add(StandardCharsets.US_ASCII.name());
} else {
for (String c: det.getProbableCharsets()) result.add(c); // worst case this returns "nomatch"
}
return result;
}