From ad3c839263be4ff7798ed8ae7cf5cf7f89806138 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Fri, 26 Apr 2019 17:59:50 -0700 Subject: [PATCH] filter-repo (python3): handle conversion of glob to regex python3 forces a couple issues for us with the conversion of globs to regexes: * fnmatch.translate() will ONLY operate on unicode strings, not bytestrings. Super lame. * newer versions of python3 modified the regex style used by fnmatch.translate() causing us to need extra logic to 'fixup' the regex into the form we want. Split the code for translating the glob to a regex out into a separate function which now houses more complicated logic to handle these extra conditions. Signed-off-by: Elijah Newren --- git-filter-repo | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 5420acd..399a921 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -119,6 +119,33 @@ def decode(bytestr): 'Try to convert bytestr to utf-8 for outputting as an error message.' return bytestr.decode('utf-8', 'backslashreplace') +def glob_to_regex(glob_bytestr): + 'Translate glob_bytestr into a regex on bytestrings' + + # fnmatch.translate is idiotic and won't accept bytestrings + if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover + raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr)) + + # Create regex operating on string + regex = fnmatch.translate(decode(glob_bytestr)) + + # FIXME: This is an ugly hack... + # fnmatch.translate tries to do multi-line matching and wants the glob to + # match up to the end of the input, which isn't relevant for us, so we + # have to modify the regex. fnmatch.translate has used different regex + # constructs to achieve this with different python versions, so we have + # to check for each of them and then fix it up. It would be much better + # if fnmatch.translate could just take some flags to allow us to specify + # what we want rather than employing this hackery, but since it + # doesn't... + if regex.endswith(r'\Z(?ms)'): # pragma: no cover + regex = regex[0:-7] + elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover + regex = regex[4:-3] + + # Finally, convert back to regex operating on bytestr + return regex.encode() + class PathQuoting: _unescape = {'a': '\a', 'b': '\b', @@ -2037,9 +2064,7 @@ class FilteringOptions(object): if line.startswith('regex:'): regex = line[6:] elif line.startswith('glob:'): - regex = fnmatch.translate(line[5:]) - if regex.endswith(r'\Z(?ms)'): - regex = regex[0:-7] + regex = glob_to_regex(line[5:]) if regex: replace_regexes.append((re.compile(regex), replacement)) else: