diff --git a/git-filter-repo b/git-filter-repo index 5420acd..399a921 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -119,6 +119,33 @@ def decode(bytestr): 'Try to convert bytestr to utf-8 for outputting as an error message.' return bytestr.decode('utf-8', 'backslashreplace') +def glob_to_regex(glob_bytestr): + 'Translate glob_bytestr into a regex on bytestrings' + + # fnmatch.translate is idiotic and won't accept bytestrings + if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover + raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr)) + + # Create regex operating on string + regex = fnmatch.translate(decode(glob_bytestr)) + + # FIXME: This is an ugly hack... + # fnmatch.translate tries to do multi-line matching and wants the glob to + # match up to the end of the input, which isn't relevant for us, so we + # have to modify the regex. fnmatch.translate has used different regex + # constructs to achieve this with different python versions, so we have + # to check for each of them and then fix it up. It would be much better + # if fnmatch.translate could just take some flags to allow us to specify + # what we want rather than employing this hackery, but since it + # doesn't... + if regex.endswith(r'\Z(?ms)'): # pragma: no cover + regex = regex[0:-7] + elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover + regex = regex[4:-3] + + # Finally, convert back to regex operating on bytestr + return regex.encode() + class PathQuoting: _unescape = {'a': '\a', 'b': '\b', @@ -2037,9 +2064,7 @@ class FilteringOptions(object): if line.startswith('regex:'): regex = line[6:] elif line.startswith('glob:'): - regex = fnmatch.translate(line[5:]) - if regex.endswith(r'\Z(?ms)'): - regex = regex[0:-7] + regex = glob_to_regex(line[5:]) if regex: replace_regexes.append((re.compile(regex), replacement)) else: