From 8405f87edb6a1fb9346409daaf5aee39b565355f Mon Sep 17 00:00:00 2001 From: funilrys Date: Sun, 4 Mar 2018 11:17:38 +0100 Subject: [PATCH] Review of domain_to_idna() to support more tests cases Please note this patch comes after an issue reported by @FadeMind which I could reproduce in 1/3 computer. --- updateHostsFile.py | 53 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/updateHostsFile.py b/updateHostsFile.py index d7c41b65f..33ea85958 100644 --- a/updateHostsFile.py +++ b/updateHostsFile.py @@ -1159,26 +1159,47 @@ def domain_to_idna(line): """ if not line.startswith('#'): - for separator in ['\t', ' ']: - comment = '' + tabs = '\t' + space = ' ' - if separator in line: - splited_line = line.split(separator) - if '#' in splited_line[1]: - index_comment = splited_line[1].find('#') + tabs_position, space_position = (line.find(tabs), line.find(space)) - if index_comment > -1: - comment = splited_line[1][index_comment:] + if tabs_position > -1 and space_position > -1: + if space_position < tabs_position: + separator = space + else: + separator = tabs + elif not tabs_position == -1: + separator = tabs + elif not space_position == -1: + separator = space + else: + separator = '' - splited_line[1] = splited_line[1] \ - .split(comment)[0] \ - .encode("IDNA").decode("UTF-8") + \ - comment + if separator: + splited_line = line.split(separator) - splited_line[1] = splited_line[1] \ - .encode("IDNA") \ - .decode("UTF-8") - return separator.join(splited_line) + index = 1 + while index < len(splited_line): + if splited_line[index]: + break + index += 1 + + if '#' in splited_line[index]: + index_comment = splited_line[index].find('#') + + if index_comment > -1: + comment = splited_line[index][index_comment:] + + splited_line[index] = splited_line[index] \ + .split(comment)[0] \ + .encode("IDNA").decode("UTF-8") + \ + comment + + splited_line[index] = splited_line[index] \ + .encode("IDNA") \ + .decode("UTF-8") + return separator.join(splited_line) return line.encode("IDNA").decode("UTF-8") return line.encode("UTF-8").decode("UTF-8")