diff --git a/git-filter-repo b/git-filter-repo index 07c769c..e66e27e 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ git-filter-repo filters git repositories, similar to git filter-branch, BFG @@ -30,8 +30,6 @@ operations; however: ***** END API BACKWARD COMPATIBILITY CAVEAT ***** """ -from __future__ import print_function - import argparse import collections import fnmatch @@ -39,7 +37,7 @@ import gettext import os import re import shutil -import StringIO +import io import subprocess import sys import time @@ -89,12 +87,12 @@ class FixedTimeZone(tzinfo): Fixed offset in minutes east from UTC. """ - tz_re = re.compile(r'^([-+]?)(\d\d)(\d\d)$') + tz_re = re.compile(br'^([-+]?)(\d\d)(\d\d)$') def __init__(self, offset_string): tzinfo.__init__(self) sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups() - factor = -1 if (sign and sign == '-') else 1 + factor = -1 if (sign and sign == b'-') else 1 self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm))) self._offset_string = offset_string @@ -114,35 +112,67 @@ def string_to_date(datestring): def date_to_string(dateobj): epoch = datetime.fromtimestamp(0, dateobj.tzinfo) - return('{} {}'.format(int(_timedelta_to_seconds(dateobj - epoch)), - dateobj.tzinfo.tzname(0))) + return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)), + dateobj.tzinfo.tzname(0))) + +def decode(bytestr): + 'Try to convert bytestr to utf-8 for outputting as an error message.' + return bytestr.decode('utf-8', 'backslashreplace') + +def glob_to_regex(glob_bytestr): + 'Translate glob_bytestr into a regex on bytestrings' + + # fnmatch.translate is idiotic and won't accept bytestrings + if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover + raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr)) + + # Create regex operating on string + regex = fnmatch.translate(decode(glob_bytestr)) + + # FIXME: This is an ugly hack... + # fnmatch.translate tries to do multi-line matching and wants the glob to + # match up to the end of the input, which isn't relevant for us, so we + # have to modify the regex. fnmatch.translate has used different regex + # constructs to achieve this with different python versions, so we have + # to check for each of them and then fix it up. It would be much better + # if fnmatch.translate could just take some flags to allow us to specify + # what we want rather than employing this hackery, but since it + # doesn't... + if regex.endswith(r'\Z(?ms)'): # pragma: no cover + regex = regex[0:-7] + elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover + regex = regex[4:-3] + + # Finally, convert back to regex operating on bytestr + return regex.encode() class PathQuoting: - _unescape = {'a': '\a', - 'b': '\b', - 'f': '\f', - 'n': '\n', - 'r': '\r', - 't': '\t', - 'v': '\v', - '"': '"', - '\\':'\\'} - _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})') - _escape = [chr(x) for x in xrange(127)]+['\\'+oct(x)[1:] for x in xrange(127,256)] + _unescape = {b'a': b'\a', + b'b': b'\b', + b'f': b'\f', + b'n': b'\n', + b'r': b'\r', + b't': b'\t', + b'v': b'\v', + b'"': b'"', + b'\\':b'\\'} + _unescape_re = re.compile(br'\\([a-z"\\]|[0-9]{3})') + _escape = [bytes([x]) for x in range(127)]+[ + b'\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)] _reverse = dict(map(reversed, _unescape.items())) for x in _reverse: - _escape[ord(x)] = '\\'+_reverse[x] + _escape[ord(x)] = b'\\'+_reverse[x] _special_chars = [len(x) > 1 for x in _escape] @staticmethod def unescape_sequence(orig): seq = orig.group(1) - return PathQuoting._unescape[seq] if len(seq) == 1 else chr(int(seq, 8)) + return PathQuoting._unescape[seq] if len(seq) == 1 else bytes([int(seq, 8)]) @staticmethod def dequote(quoted_string): - if quoted_string.startswith('"'): - assert quoted_string.endswith('"') + if quoted_string.startswith(b'"'): + assert quoted_string.endswith(b'"') return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence, quoted_string[1:-1]) return quoted_string @@ -151,11 +181,11 @@ class PathQuoting: def enquote(unquoted_string): # Option 1: Quoting when fast-export would: # pqsc = PathQuoting._special_chars - # if any(pqsc[ord(x)] for x in set(unquoted_string)): + # if any(pqsc[x] for x in set(unquoted_string)): # Option 2, perf hack: do minimal amount of quoting required by fast-import - if unquoted_string.startswith('"') or '\n' in unquoted_string: + if unquoted_string.startswith(b'"') or b'\n' in unquoted_string: pqe = PathQuoting._escape - return '"' + ''.join(pqe[ord(x)] for x in unquoted_string) + '"' + return b'"' + b''.join(pqe[x] for x in unquoted_string) + b'"' return unquoted_string class AncestryGraph(object): @@ -233,17 +263,17 @@ class MailmapInfo(object): self._parse_file(filename) def _parse_file(self, filename): - name_and_email_re = re.compile(r'(.*?)\s*<([^>]+)>\s*') - comment_re = re.compile(r'\s*#.*') + name_and_email_re = re.compile(br'(.*?)\s*<([^>]+)>\s*') + comment_re = re.compile(br'\s*#.*') if not os.access(filename, os.R_OK): - raise SystemExit(_("Cannot read %s") % filename) - with open(filename) as f: + raise SystemExit(_("Cannot read %s") % decode(filename)) + with open(filename, 'br') as f: count = 0 for line in f: count += 1 err = "Unparseable mailmap file: line #{} is bad: {}".format(count, line) # Remove comments - line = comment_re.sub('', line) + line = comment_re.sub(b'', line) # Remove leading and trailing whitespace line = line.strip() if not line: @@ -270,7 +300,7 @@ class MailmapInfo(object): ''' Given a name and email, return the expected new name and email from the mailmap if there is a translation rule for it, otherwise just return the given name and email.''' - for old, new in self.changes.iteritems(): + for old, new in self.changes.items(): old_name, old_email = old new_name, new_email = new if (email == old_email or not old_email) and ( @@ -403,17 +433,18 @@ class _GitElement(object): raise SystemExit(_("Unimplemented function: %s") % type(self).__name__ +".dump()") # pragma: no cover - def __str__(self): + def __bytes__(self): """ Convert GitElement to string; used for debugging """ old_dumped = self.dumped - writeme = StringIO.StringIO() + writeme = io.BytesIO() self.dump(writeme) output_lines = writeme.getvalue().splitlines() writeme.close() self.dumped = old_dumped - return "{}:\n {}".format(type(self).__name__, "\n ".join(output_lines)) + return b"%s:\n %s" % (type(self).__name__.encode(), + b"\n ".join(output_lines)) def skip(self, new_id=None): """ @@ -461,6 +492,7 @@ class Blob(_GitElementWithId): self.original_id = original_id # Stores the blob's data + assert(type(data) == bytes) self.data = data def dump(self, file_): @@ -469,10 +501,10 @@ class Blob(_GitElementWithId): """ self.dumped = 1 - file_.write('blob\n') - file_.write('mark :%d\n' % self.id) - file_.write('data %d\n%s' % (len(self.data), self.data)) - file_.write('\n') + file_.write(b'blob\n') + file_.write(b'mark :%d\n' % self.id) + file_.write(b'data %d\n%s' % (len(self.data), self.data)) + file_.write(b'\n') class Reset(_GitElement): @@ -500,10 +532,10 @@ class Reset(_GitElement): """ self.dumped = 1 - file_.write('reset %s\n' % self.ref) + file_.write(b'reset %s\n' % self.ref) if self.from_ref: - file_.write('from :%d\n' % self.from_ref) - file_.write('\n') + file_.write(b'from :%d\n' % self.from_ref) + file_.write(b'\n') class FileChanges(_GitElement): """ @@ -514,7 +546,10 @@ class FileChanges(_GitElement): def __init__(self, type_, filename, id_ = None, mode = None): _GitElement.__init__(self) - # Denote the type of file-change (M for modify, D for delete, etc) + # Denote the type of file-change (b'M' for modify, b'D' for delete, etc) + # We could + # assert(type(type_) == bytes) + # here but I don't just due to worries about performance overhead... self.type = type_ # Record the name of the file being changed @@ -527,15 +562,15 @@ class FileChanges(_GitElement): # blob_id is the id (mark) of the affected blob self.blob_id = None - # For 'M' file changes (modify), expect to have id and mode - if type_ == 'M': + # For b'M' file changes (modify), expect to have id and mode + if type_ == b'M': if mode is None: raise SystemExit(_("file mode and idnum needed for %s") % filename) # pragma: no cover self.mode = mode self.blob_id = id_ - # For 'R' file changes (rename), expect to have newname as third arg - elif type_ == 'R': # pragma: no cover (now avoid fast-export renames) + # For b'R' file changes (rename), expect to have newname as third arg + elif type_ == b'R': # pragma: no cover (now avoid fast-export renames) if id_ is None: raise SystemExit(_("new name needed for rename of %s") % filename) self.filename = (self.filename, id_) @@ -544,17 +579,17 @@ class FileChanges(_GitElement): """ Write this file-change element to a file """ - skipped_blob = (self.type == 'M' and self.blob_id is None) + skipped_blob = (self.type == b'M' and self.blob_id is None) if skipped_blob: return self.dumped = 1 quoted_filename = PathQuoting.enquote(self.filename) - if self.type == 'M' and isinstance(self.blob_id, int): - file_.write('M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename)) - elif self.type == 'M': - file_.write('M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename)) - elif self.type == 'D': - file_.write('D %s\n' % quoted_filename) + if self.type == b'M' and isinstance(self.blob_id, int): + file_.write(b'M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename)) + elif self.type == b'M': + file_.write(b'M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename)) + elif self.type == b'D': + file_.write(b'D %s\n' % quoted_filename) else: raise SystemExit(_("Unhandled filechange type: %s") % self.type) # pragma: no cover @@ -618,16 +653,16 @@ class Commit(_GitElementWithId): # Make output to fast-import slightly easier for humans to read if the # message has no trailing newline of its own; cosmetic, but a nice touch... - extra_newline = '\n' - if self.message.endswith('\n') or not (self.parents or self.file_changes): - extra_newline = '' + extra_newline = b'\n' + if self.message.endswith(b'\n') or not (self.parents or self.file_changes): + extra_newline = b'' - file_.write(('commit {}\n' - 'mark :{}\n' - 'author {} <{}> {}\n' - 'committer {} <{}> {}\n' - 'data {}\n{}{}' - ).format( + file_.write((b'commit %s\n' + b'mark :%d\n' + b'author %s <%s> %s\n' + b'committer %s <%s> %s\n' + b'data %d\n%s%s' + ) % ( self.branch, self.id, self.author_name, self.author_email, self.author_date, self.committer_name, self.committer_email, self.committer_date, @@ -635,16 +670,18 @@ class Commit(_GitElementWithId): extra_newline) ) for i, parent in enumerate(self.parents): - mark = ':' if isinstance(parent, int) else '' - file_.write('from ' if i==0 else 'merge ') - file_.write('{}{}\n'.format(mark, parent)) + file_.write(b'from ' if i==0 else b'merge ') + if isinstance(parent, int): + file_.write(b':%d\n' % parent) + else: + file_.write(b'%s\n' % parent) for change in self.file_changes: change.dump(file_) if not self.parents and not self.file_changes: # Workaround a bug in pre-git-2.22 versions of fast-import with # the get-mark directive. - file_.write('\n') - file_.write('\n') + file_.write(b'\n') + file_.write(b'\n') def first_parent(self): """ @@ -699,15 +736,15 @@ class Tag(_GitElement): self.dumped = 1 - file_.write('tag %s\n' % self.ref) - mark = ':' if isinstance(self.from_ref, int) else '' - file_.write('from {}{}\n'.format(mark, self.from_ref)) + file_.write(b'tag %s\n' % self.ref) + markfmt = b'from :%d\n' if isinstance(self.from_ref, int) else 'from %s\n' + file_.write(markfmt % self.from_ref) if self.tagger_name: - file_.write('tagger %s <%s> ' % (self.tagger_name, self.tagger_email)) + file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email)) file_.write(self.tagger_date) - file_.write('\n') - file_.write('data %d\n%s' % (len(self.message), self.message)) - file_.write('\n') + file_.write(b'\n') + file_.write(b'data %d\n%s' % (len(self.message), self.message)) + file_.write(b'\n') class Progress(_GitElement): """ @@ -731,8 +768,8 @@ class Progress(_GitElement): """ self.dumped = 1 - file_.write('progress %s\n' % self.message) - file_.write('\n') + file_.write(b'progress %s\n' % self.message) + file_.write(b'\n') class Checkpoint(_GitElement): """ @@ -754,8 +791,8 @@ class Checkpoint(_GitElement): """ self.dumped = 1 - file_.write('checkpoint\n') - file_.write('\n') + file_.write(b'checkpoint\n') + file_.write(b'\n') class LiteralCommand(_GitElement): """ @@ -880,20 +917,20 @@ class FastExportFilter(object): self._files_tweaked = set() # Compile some regexes and cache those - self._mark_re = re.compile(r'mark :(\d+)\n$') + self._mark_re = re.compile(br'mark :(\d+)\n$') self._parent_regexes = {} - parent_regex_rules = ('{} :(\d+)\n$', '{} ([0-9a-f]{{40}})\n') - for parent_refname in ('from', 'merge'): - ans = [re.compile(x.format(parent_refname)) for x in parent_regex_rules] + parent_regex_rules = (b' :(\d+)\n$', b' ([0-9a-f]{40})\n') + for parent_refname in (b'from', b'merge'): + ans = [re.compile(parent_refname+x) for x in parent_regex_rules] self._parent_regexes[parent_refname] = ans - self._quoted_string_re = re.compile(r'"(?:[^"\\]|\\.)*"') + self._quoted_string_re = re.compile(br'"(?:[^"\\]|\\.)*"') self._refline_regexes = {} - for refline_name in ('reset', 'commit', 'tag', 'progress'): - self._refline_regexes[refline_name] = re.compile(refline_name+' (.*)\n$') + for refline_name in (b'reset', b'commit', b'tag', b'progress'): + self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\n$') self._user_regexes = {} - for user in ('author', 'committer', 'tagger'): - self._user_regexes[user] = re.compile(user + ' (.*?) <(.*?)> (.*)\n$') - self._hash_re = re.compile(r'(\b[0-9a-f]{7,40}\b)') + for user in (b'author', b'committer', b'tagger'): + self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\n$') + self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)') def _advance_currentline(self): """ @@ -941,51 +978,51 @@ class FastExportFilter(object): """ If the current line contains a file-change object, then parse it and advance the current line; otherwise return None. We only care - about file changes of type 'M' and 'D' (these are the only types + about file changes of type b'M' and b'D' (these are the only types of file-changes that fast-export will provide). """ filechange = None - changetype = self._currentline[0] - if changetype == 'M': + changetype = self._currentline[0:1] + if changetype == b'M': (changetype, mode, idnum, path) = self._currentline.split(None, 3) - if idnum[0] == ':': + if idnum[0:1] == b':': idnum = idnum[1:] - path = path.rstrip('\n') + path = path.rstrip(b'\n') # We translate the idnum to our id system if len(idnum) != 40: idnum = _IDS.translate( int(idnum) ) if idnum is not None: - if path.startswith('"'): + if path.startswith(b'"'): path = PathQuoting.dequote(path) - filechange = FileChanges('M', path, idnum, mode) + filechange = FileChanges(b'M', path, idnum, mode) else: - filechange = 'skipped' + filechange = b'skipped' self._advance_currentline() - elif changetype == 'D': + elif changetype == b'D': (changetype, path) = self._currentline.split(None, 1) - path = path.rstrip('\n') - if path.startswith('"'): + path = path.rstrip(b'\n') + if path.startswith(b'"'): path = PathQuoting.dequote(path) - filechange = FileChanges('D', path) + filechange = FileChanges(b'D', path) self._advance_currentline() - elif changetype == 'R': # pragma: no cover (now avoid fast-export renames) + elif changetype == b'R': # pragma: no cover (now avoid fast-export renames) rest = self._currentline[2:-1] - if rest.startswith('"'): + if rest.startswith(b'"'): m = self._quoted_string_re.match(rest) if not m: raise SystemExit(_("Couldn't parse rename source")) orig = PathQuoting.dequote(m.group(0)) new = rest[m.end()+1:] else: - orig, new = rest.split(' ', 1) - if new.startswith('"'): + orig, new = rest.split(b' ', 1) + if new.startswith(b'"'): new = PathQuoting.dequote(new) - filechange = FileChanges('R', orig, new) + filechange = FileChanges(b'R', orig, new) self._advance_currentline() return filechange def _parse_original_id(self): - original_id = self._currentline[len('original-oid '):].rstrip() + original_id = self._currentline[len(b'original-oid '):].rstrip() self._advance_currentline() return original_id @@ -1019,8 +1056,8 @@ class FastExportFilter(object): # fast-import will not choke on. Let's do that. Note that +051800 # seems to be the only weird timezone found in the wild, by me or some # other posts google returned on the subject... - if when.endswith('+051800'): - when = when[0:-7]+'+0261' + if when.endswith(b'+051800'): + when = when[0:-7]+b'+0261' self._advance_currentline() return (name, email, when) @@ -1031,11 +1068,11 @@ class FastExportFilter(object): the data. """ fields = self._currentline.split() - assert fields[0] == 'data' + assert fields[0] == b'data' size = int(fields[1]) data = self._input.read(size) self._advance_currentline() - if self._currentline == '\n': + if self._currentline == b'\n': self._advance_currentline() return data @@ -1052,11 +1089,11 @@ class FastExportFilter(object): id_ = self._parse_optional_mark() original_id = None - if self._currentline.startswith('original-oid'): + if self._currentline.startswith(b'original-oid'): original_id = self._parse_original_id(); data = self._parse_data() - if self._currentline == '\n': + if self._currentline == b'\n': self._advance_currentline() # Create the blob @@ -1087,9 +1124,9 @@ class FastExportFilter(object): the callback). """ # Parse the Reset - ref = self._parse_ref_line('reset') - ignoreme, from_ref = self._parse_optional_parent_ref('from') - if self._currentline == '\n': + ref = self._parse_ref_line(b'reset') + ignoreme, from_ref = self._parse_optional_parent_ref(b'from') + if self._currentline == b'\n': self._advance_currentline() # fast-export likes to print extraneous resets that serve no purpose. @@ -1219,10 +1256,10 @@ class FastExportFilter(object): # ancestor of another parent.) num_parents = len(parents) to_remove = [] - for cur in xrange(num_parents): + for cur in range(num_parents): if not is_rewritten[cur]: continue - for other in xrange(num_parents): + for other in range(num_parents): if cur == other: continue if not self._graph.is_ancestor(parents[cur], parents[other]): @@ -1312,19 +1349,19 @@ class FastExportFilter(object): for change in commit.file_changes: parent = new_1st_parent or commit.parents[0] # exists due to above checks quoted_filename = PathQuoting.enquote(change.filename) - self._output.write("ls :{} {}\n".format(parent, quoted_filename)) + self._output.write(b"ls :%d %s\n" % (parent, quoted_filename)) self._output.flush() parent_version = fi_output.readline().split() - if change.type == 'D': - if parent_version != ['missing', quoted_filename]: + if change.type == b'D': + if parent_version != [b'missing', quoted_filename]: return False else: blob_sha = change.blob_id if isinstance(change.blob_id, int): - self._output.write("get-mark :{}\n".format(change.blob_id)) + self._output.write(b"get-mark :%d\n" % change.blob_id) self._output.flush() blob_sha = fi_output.readline().rstrip() - if parent_version != [change.mode, 'blob', blob_sha, quoted_filename]: + if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]: return False return True @@ -1334,7 +1371,7 @@ class FastExportFilter(object): # Record the mapping of old commit hash to new one if commit.original_id and self._fast_import_pipes: fi_input, fi_output = self._fast_import_pipes - self._output.write("get-mark :{}\n".format(commit.id)) + self._output.write(b"get-mark :%d\n" % commit.id) self._output.flush() orig_id = commit.original_id self._commit_short_old_hashes[orig_id[0:7]].add(orig_id) @@ -1360,19 +1397,19 @@ class FastExportFilter(object): """ # Parse the Commit. This may look involved, but it's pretty simple; it only # looks bad because a commit object contains many pieces of data. - branch = self._parse_ref_line('commit') + branch = self._parse_ref_line(b'commit') id_ = self._parse_optional_mark() original_id = None - if self._currentline.startswith('original-oid'): + if self._currentline.startswith(b'original-oid'): original_id = self._parse_original_id(); author_name = None - if self._currentline.startswith('author'): - (author_name, author_email, author_date) = self._parse_user('author') + if self._currentline.startswith(b'author'): + (author_name, author_email, author_date) = self._parse_user(b'author') (committer_name, committer_email, committer_date) = \ - self._parse_user('committer') + self._parse_user(b'committer') if not author_name: (author_name, author_email, author_date) = \ @@ -1381,12 +1418,12 @@ class FastExportFilter(object): commit_msg = self._parse_data() commit_msg = self._hash_re.sub(self._translate_commit_hash, commit_msg) - pinfo = [self._parse_optional_parent_ref('from')] + pinfo = [self._parse_optional_parent_ref(b'from')] # Due to empty pruning, we can have real 'from' and 'merge' lines that # due to commit rewriting map to a parent of None. We need to record # 'from' if its non-None, and we need to parse all 'merge' lines. - while self._currentline.startswith('merge '): - pinfo.append(self._parse_optional_parent_ref('merge')) + while self._currentline.startswith(b'merge '): + pinfo.append(self._parse_optional_parent_ref(b'merge')) orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)] # No parents is oddly represented as [None] instead of [], due to the @@ -1404,10 +1441,10 @@ class FastExportFilter(object): file_change = self._parse_optional_filechange() had_file_changes = file_change is not None while file_change: - if not (type(file_change) == str and file_change == 'skipped'): + if not (type(file_change) == bytes and file_change == b'skipped'): file_changes.append(file_change) file_change = self._parse_optional_filechange() - if self._currentline == '\n': + if self._currentline == b'\n': self._advance_currentline() # Okay, now we can finally create the Commit object @@ -1480,18 +1517,18 @@ class FastExportFilter(object): the callback). """ # Parse the Tag - tag = self._parse_ref_line('tag') - ignoreme, from_ref = self._parse_optional_parent_ref('from') + tag = self._parse_ref_line(b'tag') + ignoreme, from_ref = self._parse_optional_parent_ref(b'from') original_id = None - if self._currentline.startswith('original-oid'): + if self._currentline.startswith(b'original-oid'): original_id = self._parse_original_id(); tagger_name, tagger_email, tagger_date = None, None, None - if self._currentline.startswith('tagger'): - (tagger_name, tagger_email, tagger_date) = self._parse_user('tagger') + if self._currentline.startswith(b'tagger'): + (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger') tag_msg = self._parse_data() - if self._currentline == '\n': + if self._currentline == b'\n': self._advance_currentline() # Create the tag @@ -1514,7 +1551,7 @@ class FastExportFilter(object): tag.dump(self._output) # Record the fact that this tag was seen so we don't nuke it as part # of refs_to_nuke. - full_ref = 'refs/tags/{}'.format(tag.ref) + full_ref = b'refs/tags/' + tag.ref self._seen_refs[full_ref] = None def _parse_progress(self): @@ -1526,8 +1563,8 @@ class FastExportFilter(object): everything else is done (unless it has been skipped by the callback). """ # Parse the Progress - message = self._parse_ref_line('progress') - if self._currentline == '\n': + message = self._parse_ref_line(b'progress') + if self._currentline == b'\n': self._advance_currentline() # Create the progress message @@ -1555,7 +1592,7 @@ class FastExportFilter(object): """ # Parse the Checkpoint self._advance_currentline() - if self._currentline == '\n': + if self._currentline == b'\n': self._advance_currentline() # Create the checkpoint @@ -1587,7 +1624,7 @@ class FastExportFilter(object): def _handle_final_commands(self): self._finalize_handled = True - for ref, value in self._seen_refs.iteritems(): + for ref, value in self._seen_refs.items(): if value is not None: # Create a reset reset = Reset(ref, value) @@ -1602,17 +1639,18 @@ class FastExportFilter(object): reset.dump(self._output) def record_metadata(self, metadata_dir, orig_refs, refs_nuked): - deleted_hash = '0'*40 + deleted_hash = b'0'*40 self._flush_renames() - with open(os.path.join(metadata_dir, 'commit-map'), 'w') as f: - f.write("%-40s %s\n" % (_("old"), _("new"))) - for (old,new) in self._commit_renames.iteritems(): - f.write('{} {}\n'.format(old, new if new != None else deleted_hash)) + with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f: + f.write(("%-40s %s\n" % (_("old"), _("new"))).encode()) + for (old,new) in self._commit_renames.items(): + msg = b'%s %s\n' % (old, new if new != None else deleted_hash) + f.write(msg) batch_check_process = None - batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$') - with open(os.path.join(metadata_dir, 'ref-map'), 'w') as f: - for refname, old_hash in orig_refs.iteritems(): + batch_check_output_re = re.compile(b'^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$') + with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f: + for refname, old_hash in orig_refs.items(): if refname in refs_nuked: new_hash = deleted_hash elif old_hash in self._commit_renames: @@ -1625,21 +1663,22 @@ class FastExportFilter(object): stdin=subprocess.PIPE, stdout=subprocess.PIPE, cwd=self._repo_working_dir) - batch_check_process.stdin.write(refname+"\n") + batch_check_process.stdin.write(refname+b"\n") + batch_check_process.stdin.flush() line = batch_check_process.stdout.readline() m = batch_check_output_re.match(line) - if not m or m.group(2) != 'tag': + if not m or m.group(2) != b'tag': raise SystemExit(_("Failed to find new id for %(refname)s " "(old id was %(old_hash)s)") % ({'refname': refname, 'old_hash': old_hash}) ) # pragma: no cover new_hash = m.group(1) - f.write('{} {} {}\n'.format(old_hash, new_hash, refname)) + f.write(b'%s %s %s\n' % (old_hash, new_hash, refname)) if batch_check_process: batch_check_process.stdin.close() batch_check_process.wait() - with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'w') as f: + with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f: issues_found = False if self._commits_no_longer_merges: issues_found = True @@ -1649,10 +1688,10 @@ class FastExportFilter(object): are now regular commits; they likely have suboptimal commit messages (e.g. "Merge branch next into master"). Original commit hash on the left, commit hash after filtering/rewriting on the right: - ''')[1:])) + ''')[1:]).encode()) for oldhash, newhash in self._commits_no_longer_merges: - f.write(' {} {}\n'.format(oldhash, newhash)) - f.write('\n') + f.write(' {} {}\n'.format(oldhash, newhash).encode()) + f.write(b'\n') if self._commits_referenced_but_removed: issues_found = True @@ -1660,16 +1699,16 @@ class FastExportFilter(object): The following commits were filtered out, but referenced in another commit message. The reference to the now-nonexistent commit hash (or a substring thereof) was left as-is in any commit messages: - ''')[1:])) + ''')[1:]).encode()) for bad_commit_reference in self._commits_referenced_but_removed: - f.write(' {}\n'.format(bad_commit_reference)) - f.write('\n') + f.write(' {}\n'.format(bad_commit_reference).encode()) + f.write(b'\n') if not issues_found: - f.write(_("No filtering problems encountered.")) + f.write(_("No filtering problems encountered.\n").encode()) - with open(os.path.join(metadata_dir, 'already_ran'), 'w') as f: - f.write(_("This file exists to allow you to filter again without --force.")) + with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f: + f.write(_("This file exists to allow you to filter again without --force.\n").encode()) def get_seen_refs(self): return self._seen_refs.keys() @@ -1687,30 +1726,30 @@ class FastExportFilter(object): # Run over the input and do the filtering self._advance_currentline() while self._currentline: - if self._currentline.startswith('blob'): + if self._currentline.startswith(b'blob'): self._parse_blob() - elif self._currentline.startswith('reset'): + elif self._currentline.startswith(b'reset'): self._parse_reset() - elif self._currentline.startswith('commit'): + elif self._currentline.startswith(b'commit'): self._parse_commit() - elif self._currentline.startswith('tag'): + elif self._currentline.startswith(b'tag'): self._parse_tag() - elif self._currentline.startswith('progress'): + elif self._currentline.startswith(b'progress'): self._parse_progress() - elif self._currentline.startswith('checkpoint'): + elif self._currentline.startswith(b'checkpoint'): self._parse_checkpoint() - elif self._currentline.startswith('feature'): + elif self._currentline.startswith(b'feature'): self._parse_literal_command() - elif self._currentline.startswith('option'): + elif self._currentline.startswith(b'option'): self._parse_literal_command() - elif self._currentline.startswith('done'): + elif self._currentline.startswith(b'done'): self._handle_final_commands() self._parse_literal_command() - elif self._currentline.startswith('#'): + elif self._currentline.startswith(b'#'): self._parse_literal_command() - elif self._currentline.startswith('get-mark') or \ - self._currentline.startswith('cat-blob') or \ - self._currentline.startswith('ls'): + elif self._currentline.startswith(b'get-mark') or \ + self._currentline.startswith(b'cat-blob') or \ + self._currentline.startswith(b'ls'): raise SystemExit(_("Unsupported command: '%s'") % self._currentline) else: raise SystemExit(_("Could not parse line: '%s'") % self._currentline) @@ -1767,13 +1806,13 @@ class GitUtils(object): def is_repository_bare(repo_working_dir): out = subprocess.check_output('git rev-parse --is-bare-repository'.split(), cwd=repo_working_dir) - return (out.strip() == 'true') + return (out.strip() == b'true') @staticmethod def determine_git_dir(repo_working_dir): d = subprocess.check_output('git rev-parse --git-dir'.split(), cwd=repo_working_dir).strip() - if repo_working_dir=='.' or d.startswith('/'): + if repo_working_dir==b'.' or d.startswith(b'/'): return d return os.path.join(repo_working_dir, d) @@ -1810,12 +1849,12 @@ class FilteringOptions(object): def __call__(self, parser, namespace, values, option_string=None): af = FilteringOptions.AppendFilter(dest='path_changes', option_strings=None) - dirname = values if values[-1] == '/' else values+'/' + dirname = values if values[-1] == b'/' else values+b'/' if option_string == '--subdirectory-filter': af(parser, namespace, dirname, '--path-match') - af(parser, namespace, dirname+':', '--path-rename') + af(parser, namespace, dirname+b':', '--path-rename') elif option_string == '--to-subdirectory-filter': - af(parser, namespace, ':'+dirname, '--path-rename') + af(parser, namespace, b':'+dirname, '--path-rename') else: raise SystemExit(_("Error: HelperFilter given invalid option_string: %s") % option_string) # pragma: no cover @@ -1867,16 +1906,17 @@ class FilteringOptions(object): "files matching none of those options.")) path.add_argument('--path-match', '--path', metavar='DIR_OR_FILE', + type=os.fsencode, action=FilteringOptions.AppendFilter, dest='path_changes', help=_("Exact paths (files or directories) to include in filtered " "history. Multiple --path options can be specified to get " "a union of paths.")) - path.add_argument('--path-glob', metavar='GLOB', + path.add_argument('--path-glob', metavar='GLOB', type=os.fsencode, action=FilteringOptions.AppendFilter, dest='path_changes', help=_("Glob of paths to include in filtered history. Multiple " "--path-glob options can be specified to get a union of " "paths.")) - path.add_argument('--path-regex', metavar='REGEX', + path.add_argument('--path-regex', metavar='REGEX', type=os.fsencode, action=FilteringOptions.AppendFilter, dest='path_changes', help=_("Regex of paths to include in filtered history. Multiple " "--path-regex options can be specified to get a union of " @@ -1884,31 +1924,32 @@ class FilteringOptions(object): rename = parser.add_argument_group(title=_("Renaming based on paths")) rename.add_argument('--path-rename', '--path-rename-prefix', - metavar='OLD_NAME:NEW_NAME', dest='path_changes', + metavar='OLD_NAME:NEW_NAME', dest='path_changes', type=os.fsencode, action=FilteringOptions.AppendFilter, help=_("Prefix to rename; if filename starts with OLD_NAME, " "replace that with NEW_NAME. Multiple --path-rename " "options can be specified.")) refrename = parser.add_argument_group(title=_("Renaming of refs")) - refrename.add_argument('--tag-rename', metavar='OLD:NEW', + refrename.add_argument('--tag-rename', metavar='OLD:NEW', type=os.fsencode, help=_("Rename tags starting with OLD to start with NEW. For " "example, --tag-rename foo:bar will rename tag foo-1.2.3 " "to bar-1.2.3; either OLD or NEW can be empty.")) helpers = parser.add_argument_group(title=_("Shortcuts")) helpers.add_argument('--subdirectory-filter', metavar='DIRECTORY', - action=FilteringOptions.HelperFilter, + action=FilteringOptions.HelperFilter, type=os.fsencode, help=_("Only look at history that touches the given subdirectory " "and treat that directory as the project root. Equivalent " "to using '--path DIRECTORY/ --path-rename DIRECTORY/:'")) helpers.add_argument('--to-subdirectory-filter', metavar='DIRECTORY', - action=FilteringOptions.HelperFilter, + action=FilteringOptions.HelperFilter, type=os.fsencode, help=_("Treat the project root as instead being under DIRECTORY. " "Equivalent to using '--path-rename :DIRECTORY/'")) people = parser.add_argument_group(title=_("Filtering of names/emails")) people.add_argument('--mailmap', dest='mailmap', metavar='FILENAME', + type=os.fsencode, help=_("Use specified mailmap file (see git-shortlog(1) for " "details on the format) when rewriting author, committer, " "and tagger names and emails. If the specified file is " @@ -1958,8 +1999,9 @@ class FilteringOptions(object): "CALLBACKS section below.")) location = parser.add_argument_group(title=_("Location to filter from/to")) - location.add_argument('--source', help=_("Git repository to read from")) - location.add_argument('--target', + location.add_argument('--source', type=os.fsencode, + help=_("Git repository to read from")) + location.add_argument('--target', type=os.fsencode, help=_("Git repository to overwrite with filtered history")) misc = parser.add_argument_group(title=_("Miscellaneous options")) @@ -2013,7 +2055,7 @@ class FilteringOptions(object): stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() output = p.stdout.read() - if '--combined-all-paths' not in output: + if b'--combined-all-paths' not in output: raise SystemExit(_("Error: need a version of git whose diff-tree command " "has the --combined-all-paths option")) # pragma: no cover @@ -2021,28 +2063,26 @@ class FilteringOptions(object): def get_replace_text(filename): replace_literals = [] replace_regexes = [] - with open(filename) as f: + with open(filename, 'br') as f: for line in f: - line = line.rstrip('\r\n') + line = line.rstrip(b'\r\n') # Determine the replacement - replacement = '***REMOVED***' - if '==>' in line: - line, replacement = line.rsplit('==>', 1) + replacement = b'***REMOVED***' + if b'==>' in line: + line, replacement = line.rsplit(b'==>', 1) # See if we need to match via regex regex = None - if line.startswith('regex:'): + if line.startswith(b'regex:'): regex = line[6:] - elif line.startswith('glob:'): - regex = fnmatch.translate(line[5:]) - if regex.endswith(r'\Z(?ms)'): - regex = regex[0:-7] + elif line.startswith(b'glob:'): + regex = glob_to_regex(line[5:]) if regex: replace_regexes.append((re.compile(regex), replacement)) else: # Otherwise, find the literal we need to replace - if line.startswith('literal:'): + if line.startswith(b'literal:'): line = line[8:] if not line: continue @@ -2104,7 +2144,7 @@ class RepoAnalyze(object): @staticmethod def handle_renames(stats, commit, change_types, filenames): for index, change_type in enumerate(change_types): - if change_type == 'R': + if change_type == ord(b'R'): oldname, newname = filenames[index], filenames[-1] RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname) RepoAnalyze.setup_or_update_rename_history(stats, commit, @@ -2117,7 +2157,7 @@ class RepoAnalyze(object): # Figure out kind of deletions to undo for this file, and update lists # of all-names-by-sha and all-filenames delmode = 'tree_deletions' - if mode != '040000': + if mode != b'040000': delmode = 'file_deletions' stats['names'][sha].add(filename) stats['allnames'].add(filename) @@ -2147,22 +2187,22 @@ class RepoAnalyze(object): graph.add_commit_and_parents(commit, parents) for change in file_changes: modes, shas, change_types, filenames = change - if len(parents) == 1 and change_types.startswith('R'): - change_types = 'R' # remove the rename score; we don't care - if modes[-1] == '160000': + if len(parents) == 1 and change_types.startswith(b'R'): + change_types = b'R' # remove the rename score; we don't care + if modes[-1] == b'160000': continue - elif modes[-1] == '000000': + elif modes[-1] == b'000000': # Track when files/directories are deleted for f in RepoAnalyze.equiv_class(stats, filenames[-1]): - if any(x == '040000' for x in modes[0:-1]): + if any(x == b'040000' for x in modes[0:-1]): stats['tree_deletions'][f] = date else: stats['file_deletions'][f] = date - elif change_types.strip('AMT') == '': + elif change_types.strip(b'AMT') == b'': RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) - elif modes[-1] == '040000' and change_types.strip('RAM') == '': + elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'': RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) - elif change_types.strip('RAM') == '': + elif change_types.strip(b'RAM') == b'': RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames) RepoAnalyze.handle_renames(stats, commit, change_types, filenames) else: @@ -2187,7 +2227,7 @@ class RepoAnalyze(object): for line in cf.stdout: sha, objtype, objsize, objdisksize = line.split() objsize, objdisksize = int(objsize), int(objdisksize) - if objtype == 'blob': + if objtype == b'blob': unpacked_size[sha] = objsize packed_size[sha] = objdisksize num_blobs += 1 @@ -2212,25 +2252,23 @@ class RepoAnalyze(object): ' --date=short -M -t -c --raw --combined-all-paths') dtp = subprocess.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE) f = dtp.stdout - try: - line = f.next() - except StopIteration: + line = f.readline() + if not line: raise SystemExit(_("Nothing to analyze; repository is empty.")) cont = bool(line) graph = AncestryGraph() while cont: commit = line.rstrip() - parents = f.next().split() - date = f.next().rstrip() + parents = f.readline().split() + date = f.readline().rstrip() # We expect a blank line next; if we get a non-blank line then # this commit modified no files and we need to move on to the next. # If there is no line, we've reached end-of-input. - try: - line = f.next().rstrip() - cont = True - except StopIteration: + line = f.readline() + if not line: cont = False + line = line.rstrip() # If we haven't reached end of input, and we got a blank line meaning # a commit that has modified files, then get the file changes associated @@ -2239,17 +2277,17 @@ class RepoAnalyze(object): if cont and not line: cont = False for line in f: - if not line.startswith(':'): + if not line.startswith(b':'): cont = True break n = 1+max(1, len(parents)) - assert line.startswith(':'*(n-1)) + assert line.startswith(b':'*(n-1)) relevant = line[n-1:-1] splits = relevant.split(None, n) modes = splits[0:n] splits = splits[n].split(None, n) shas = splits[0:n] - splits = splits[n].split('\t') + splits = splits[n].split(b'\t') change_types = splits[0] filenames = [PathQuoting.dequote(x) for x in splits[1:]] file_changes.append([modes, shas, change_types, filenames]) @@ -2274,13 +2312,13 @@ class RepoAnalyze(object): @staticmethod def write_report(reportdir, stats): def datestr(datetimestr): - return datetimestr if datetimestr else _('') + return datetimestr if datetimestr else _('').encode() def dirnames(path): while True: path = os.path.dirname(path) yield path - if path == '': + if path == b'': break # Compute aggregate size information for paths, extensions, and dirs @@ -2322,27 +2360,27 @@ class RepoAnalyze(object): for name in dir_size['packed']: dir_deleted_data[name] = stats['tree_deletions'].get(name, None) - with open(os.path.join(reportdir, "README"), 'w') as f: + with open(os.path.join(reportdir, b"README"), 'bw') as f: # Give a basic overview of this file - f.write("== %s ==\n" % _("Overall Statistics")) - f.write(" %s: %d\n" % (_("Number of commits"), - stats['num_commits'])) - f.write(" %s: %d\n" % (_("Number of filenames"), - len(path_size['packed']))) - f.write(" %s: %d\n" % (_("Number of directories"), - len(dir_size['packed']))) - f.write(" %s: %d\n" % (_("Number of file extensions"), - len(ext_size['packed']))) - f.write("\n") - f.write(" %s: %d\n" % (_("Total unpacked size (bytes)"), - total_size['unpacked'])) - f.write(" %s: %d\n" % (_("Total packed size (bytes)"), - total_size['packed'])) - f.write("\n") + f.write(b"== %s ==\n" % _("Overall Statistics").encode()) + f.write((" %s: %d\n" % (_("Number of commits"), + stats['num_commits'])).encode()) + f.write((" %s: %d\n" % (_("Number of filenames"), + len(path_size['packed']))).encode()) + f.write((" %s: %d\n" % (_("Number of directories"), + len(dir_size['packed']))).encode()) + f.write((" %s: %d\n" % (_("Number of file extensions"), + len(ext_size['packed']))).encode()) + f.write(b"\n") + f.write((" %s: %d\n" % (_("Total unpacked size (bytes)"), + total_size['unpacked'])).encode()) + f.write((" %s: %d\n" % (_("Total packed size (bytes)"), + total_size['packed'])).encode()) + f.write(b"\n") # Mention issues with the report - f.write("== %s ==\n" % _("Caveats")) - f.write("=== %s ===\n" % _("Sizes")) + f.write(("== %s ==\n" % _("Caveats")).encode()) + f.write(("=== %s ===\n" % _("Sizes")).encode()) f.write(textwrap.dedent(_(""" Packed size represents what size your repository would be if no trees, commits, tags, or other metadata were included (though it may @@ -2370,9 +2408,9 @@ class RepoAnalyze(object): ever reverted to a previous version's contents, the previous version's size will be counted multiple times in this analysis, even though git will only store it once. - """)[1:])) - f.write("\n") - f.write("=== %s ===\n" % _("Deletions")) + """)[1:]).encode()) + f.write(b"\n") + f.write(("=== %s ===\n" % _("Deletions")).encode()) f.write(textwrap.dedent(_(""" Whether a file is deleted is not a binary quality, since it can be deleted on some branches but still exist in others. Also, it might @@ -2388,9 +2426,9 @@ class RepoAnalyze(object): stream that mentions the file lists it as deleted. This makes it dependent on topological ordering, but generally gives the "right" answer. - """)[1:])) - f.write("\n") - f.write("=== %s ===\n" % _("Renames")) + """)[1:]).encode()) + f.write(b"\n") + f.write(("=== %s ===\n" % _("Renames")).encode()) f.write(textwrap.dedent(_(""" Renames share the same non-binary nature that deletions do, plus additional challenges: @@ -2406,102 +2444,106 @@ class RepoAnalyze(object): * The ability for users to rename files differently in different branches means that our chains of renames will not necessarily be linear but may branch out. - """)[1:])) - f.write("\n") + """)[1:]).encode()) + f.write(b"\n") # Equivalence classes for names, so if folks only want to keep a # certain set of paths, they know the old names they want to include # too. - with open(os.path.join(reportdir, "renames.txt"), 'w') as f: + with open(os.path.join(reportdir, b"renames.txt"), 'bw') as f: seen = set() - for pathname,equiv_group in sorted(stats['equivalence'].iteritems(), + for pathname,equiv_group in sorted(stats['equivalence'].items(), key=lambda x:(x[1], x[0])): if equiv_group in seen: continue seen.add(equiv_group) - f.write("{} ->\n ".format(equiv_group[0]) + - "\n ".join(equiv_group[1:]) + - "\n") + f.write(("{} ->\n ".format(decode(equiv_group[0])) + + "\n ".join(decode(x) for x in equiv_group[1:]) + + "\n").encode()) # List directories in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("Deleted directories by reverse size")) - f.write(_("Format: unpacked size, packed size, date deleted, directory name\n")) - for dirname, size in sorted(dir_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"directories-deleted-sizes.txt"), 'bw') as f: + msg = "=== %s ===\n" % _("Deleted directories by reverse size") + f.write(msg.encode()) + msg = _("Format: unpacked size, packed size, date deleted, directory name\n") + f.write(msg.encode()) + for dirname, size in sorted(dir_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): if (dir_deleted_data[dirname]): - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(dir_size['unpacked'][dirname], - size, - datestr(dir_deleted_data[dirname]), - dirname or _(''))) + f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname], + size, + datestr(dir_deleted_data[dirname]), + dirname or _('').encode())) - with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("All directories by reverse size")) - f.write(_("Format: unpacked size, packed size, date deleted, directory name\n")) - for dirname, size in sorted(dir_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"directories-all-sizes.txt"), 'bw') as f: + f.write(("=== %s ===\n" % _("All directories by reverse size")).encode()) + msg = _("Format: unpacked size, packed size, date deleted, directory name\n") + f.write(msg.encode()) + for dirname, size in sorted(dir_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(dir_size['unpacked'][dirname], - size, - datestr(dir_deleted_data[dirname]), - dirname or _(""))) + f.write(b" %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname], + size, + datestr(dir_deleted_data[dirname]), + dirname or _("").encode())) # List extensions in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("Deleted extensions by reverse size")) - f.write(_("Format: unpacked size, packed size, date deleted, extension name\n")) - for extname, size in sorted(ext_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"extensions-deleted-sizes.txt"), 'bw') as f: + msg = "=== %s ===\n" % _("Deleted extensions by reverse size") + f.write(msg.encode()) + msg = _("Format: unpacked size, packed size, date deleted, extension name\n") + f.write(msg.encode()) + for extname, size in sorted(ext_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): if (ext_deleted_data[extname]): - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(ext_size['unpacked'][extname], - size, - datestr(ext_deleted_data[extname]), - extname or _(''))) + f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname], + size, + datestr(ext_deleted_data[extname]), + extname or _('').encode())) - with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("All extensions by reverse size")) - f.write(_("Format: unpacked size, packed size, date deleted, extension name\n")) - for extname, size in sorted(ext_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"extensions-all-sizes.txt"), 'bw') as f: + f.write(("=== %s ===\n" % _("All extensions by reverse size")).encode()) + msg = _("Format: unpacked size, packed size, date deleted, extension name\n") + f.write(msg.encode()) + for extname, size in sorted(ext_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(ext_size['unpacked'][extname], - size, - datestr(ext_deleted_data[extname]), - extname or _(''))) + f.write(b" %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname], + size, + datestr(ext_deleted_data[extname]), + extname or _('').encode())) # List files in reverse sorted order of unpacked size - with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size")) - f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n")) - for pathname, size in sorted(path_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"path-deleted-sizes.txt"), 'bw') as f: + msg = "=== %s ===\n" % _("Deleted paths by reverse accumulated size") + f.write(msg.encode()) + msg = _("Format: unpacked size, packed size, date deleted, path name(s)\n") + f.write(msg.encode()) + for pathname, size in sorted(path_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): when = stats['file_deletions'].get(pathname, None) if when: - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(path_size['unpacked'][pathname], - size, - datestr(when), - pathname)) + f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname], + size, + datestr(when), + pathname)) - with open(os.path.join(reportdir, "path-all-sizes.txt"), 'w') as f: - f.write("=== %s ===\n" % _("All paths by reverse accumulated size")) - f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n")) - for pathname, size in sorted(path_size['packed'].iteritems(), + with open(os.path.join(reportdir, b"path-all-sizes.txt"), 'bw') as f: + msg = "=== %s ===\n" % _("All paths by reverse accumulated size") + f.write(msg.encode()) + msg = _("Format: unpacked size, packed size, date deleted, pathectory name\n") + f.write(msg.encode()) + for pathname, size in sorted(path_size['packed'].items(), key=lambda x:(x[1],x[0]), reverse=True): when = stats['file_deletions'].get(pathname, None) - f.write(" {:10d} {:10d} {:10s} {}\n" - .format(path_size['unpacked'][pathname], - size, - datestr(when), - pathname)) + f.write(b" %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname], + size, + datestr(when), + pathname)) # List of filenames and sizes in descending order - with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'w') as f: - f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")) - f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n")) - for sha, size in sorted(stats['packed_size'].iteritems(), + with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f: + f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode()) + f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode()) + for sha, size in sorted(stats['packed_size'].items(), key=lambda x:(x[1],x[0]), reverse=True): if sha not in stats['names']: # Some objects in the repository might not be referenced, or not @@ -2511,21 +2553,21 @@ class RepoAnalyze(object): if len(names_with_sha) == 1: names_with_sha = names_with_sha.pop() else: - names_with_sha = sorted(list(names_with_sha)) - f.write(" {} {:10d} {:10d} {}\n".format(sha, - stats['unpacked_size'][sha], - size, - names_with_sha)) + names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']' + f.write(b" %s %10d %10d %s\n" % (sha, + stats['unpacked_size'][sha], + size, + names_with_sha)) @staticmethod def run(args): - git_dir = GitUtils.determine_git_dir('.') + git_dir = GitUtils.determine_git_dir(b'.') # Create the report directory as necessary - results_tmp_dir = os.path.join(git_dir, 'filter-repo') + results_tmp_dir = os.path.join(git_dir, b'filter-repo') if not os.path.isdir(results_tmp_dir): os.mkdir(results_tmp_dir) - reportdir = os.path.join(results_tmp_dir, "analysis") + reportdir = os.path.join(results_tmp_dir, b"analysis") if not args.force and os.path.isdir(reportdir): shutil.rmtree(reportdir) os.mkdir(reportdir) @@ -2534,7 +2576,7 @@ class RepoAnalyze(object): stats = RepoAnalyze.gather_data(args) # Write the reports - sys.stdout.write(_("Writing reports to %s...") % reportdir) + sys.stdout.write(_("Writing reports to %s...") % decode(reportdir)) sys.stdout.flush() RepoAnalyze.write_report(reportdir, stats) sys.stdout.write(_("done.\n")) @@ -2621,8 +2663,8 @@ class RepoFilter(object): def _handle_arg_callbacks(self): def make_callback(argname, str): - exec 'def callback({}):\n'.format(argname)+\ - ' '+'\n '.join(str.splitlines()) in globals() + exec('def callback({}):\n'.format(argname)+ + ' '+'\n '.join(str.splitlines()), globals()) return callback #namespace['callback'] def handle(type): callback_field = '_{}_callback'.format(type) @@ -2663,7 +2705,7 @@ class RepoFilter(object): # Do sanity checks from the correct directory tmp_dir = self.results_tmp_dir(create_if_missing=False) if not self._args.force and \ - not os.path.isfile(os.path.join(tmp_dir, 'already_ran')): + not os.path.isfile(os.path.join(tmp_dir, b'already_ran')): cwd = os.getcwd() os.chdir(target_working_dir) RepoFilter.sanity_check(self._orig_refs, is_bare) @@ -2680,38 +2722,38 @@ class RepoFilter(object): # Make sure repo is fully packed, just like a fresh clone would be output = subprocess.check_output('git count-objects -v'.split()) - stats = dict(x.split(': ') for x in output.splitlines()) - num_packs = int(stats['packs']) - if stats['count'] != '0' or num_packs > 1: + stats = dict(x.split(b': ') for x in output.splitlines()) + num_packs = int(stats[b'packs']) + if stats[b'count'] != b'0' or num_packs > 1: abort(_("expected freshly packed repo")) # Make sure there is precisely one remote, named "origin"...or that this # is a new bare repo with no packs and no remotes output = subprocess.check_output('git remote'.split()).strip() - if not (output == "origin" or (num_packs == 0 and not output)): + if not (output == b"origin" or (num_packs == 0 and not output)): abort(_("expected one remote, origin")) # Avoid letting people running with weird setups and overwriting GIT_DIR # elsewhere - git_dir = GitUtils.determine_git_dir('.') - if is_bare and git_dir != '.': + git_dir = GitUtils.determine_git_dir(b'.') + if is_bare and git_dir != b'.': abort(_("GIT_DIR must be .")) - elif not is_bare and git_dir != '.git': + elif not is_bare and git_dir != b'.git': abort(_("GIT_DIR must be .git")) # Make sure that all reflogs have precisely one entry - reflog_dir=os.path.join(git_dir, 'logs') + reflog_dir=os.path.join(git_dir, b'logs') for root, dirs, files in os.walk(reflog_dir): for filename in files: pathname = os.path.join(root, filename) - with open(pathname) as f: + with open(pathname, 'br') as f: if len(f.read().splitlines()) > 1: shortpath = pathname[len(reflog_dir)+1:] abort(_("expected at most one entry in the reflog for %s") % - shortpath) + decode(shortpath)) # Make sure there are no stashed changes - if 'refs/stash' in refs: + if b'refs/stash' in refs: abort(_("has stashed changes")) # Do extra checks in non-bare repos @@ -2725,14 +2767,16 @@ class RepoFilter(object): abort(_("you have untracked changes")) # Avoid unpushed changes - for refname, rev in refs.iteritems(): - if not refname.startswith('refs/heads/'): + for refname, rev in refs.items(): + if not refname.startswith(b'refs/heads/'): continue - origin_ref = refname.replace('refs/heads/', 'refs/remotes/origin/') + origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/') if origin_ref not in refs: - abort(_('%s exists, but %s not found') % (refname, origin_ref)) + abort(_('%s exists, but %s not found') % (decode(refname), + decode(origin_ref))) if rev != refs[origin_ref]: - abort(_('%s does not match %s') % (refname, origin_ref)) + abort(_('%s does not match %s') % (decode(refname), + decode(origin_ref))) @staticmethod def tweak_blob(args, blob): @@ -2744,13 +2788,13 @@ class RepoFilter(object): def tweak_commit(self, commit): def filename_matches(path_expression, pathname): - if path_expression == '': + if path_expression == b'': return True n = len(path_expression) if (pathname.startswith(path_expression) and - (path_expression[n-1] == '/' or + (path_expression[n-1:n] == b'/' or len(pathname) == n or - pathname[n] == '/')): + pathname[n:n+1] == b'/')): return True return False @@ -2766,7 +2810,7 @@ class RepoFilter(object): if match_type == 'regex' and path_exp.search(pathname): wanted = True elif mod_type == 'rename': - old_exp, new_exp = path_exp.split(':') + old_exp, new_exp = path_exp.split(b':') assert match_type in ('prefix',) if match_type == 'prefix' and pathname.startswith(old_exp): pathname = pathname.replace(old_exp, new_exp, 1) @@ -2834,15 +2878,15 @@ class RepoFilter(object): # in sync with the original with any changes, and then decides # they want to rewrite history to only have one of the two files) colliding_change = new_file_changes[change.filename] - if change.type == 'D': + if change.type == b'D': # We can just throw this one away and keep the other continue - elif change.type == 'M' and ( + elif change.type == b'M' and ( change.mode == colliding_change.mode and change.blob_id == colliding_change.blob_id): # The two are identical, so we can throw this one away and keep other continue - elif new_file_changes[change.filename].type != 'D': + elif new_file_changes[change.filename].type != b'D': raise SystemExit(_("File renaming caused colliding pathnames!\n") + _(" Commit: {}\n").format(commit.original_id) + _(" Filename: {}").format(change.filename)) @@ -2851,8 +2895,8 @@ class RepoFilter(object): @staticmethod def do_tag_rename(rename_pair, tagname): - old, new = rename_pair.split(':', 1) - old, new = 'refs/tags/'+old, 'refs/tags/'+new + old, new = rename_pair.split(b':', 1) + old, new = b'refs/tags/'+old, b'refs/tags/'+new if tagname.startswith(old): return tagname.replace(old, new, 1) return tagname @@ -2863,7 +2907,7 @@ class RepoFilter(object): tag.message = self._message_callback(tag.message) # Tweak the tag name according to callbacks - tag_prefix = 'refs/tags/' + tag_prefix = b'refs/tags/' fullref = tag_prefix+tag.ref if self._args.tag_rename: fullref = RepoFilter.do_tag_rename(self._args.tag_rename, fullref) @@ -2891,9 +2935,9 @@ class RepoFilter(object): reset.ref = self._refname_callback(reset.ref) def results_tmp_dir(self, create_if_missing=True): - working_dir = self._args.target or self._args.source or '.' + working_dir = self._args.target or self._args.source or b'.' git_dir = GitUtils.determine_git_dir(working_dir) - d = os.path.join(git_dir, 'filter-repo') + d = os.path.join(git_dir, b'filter-repo') if create_if_missing and not os.path.isdir(d): os.mkdir(d) return d @@ -2919,7 +2963,8 @@ class RepoFilter(object): def _setup_input(self, use_done_feature): if self._args.stdin: - self._input = sys.stdin + self._input = sys.stdin.detach() + sys.stdin = None # Make sure no one tries to accidentally use it self._fe_orig = None else: skip_blobs = (self._blob_callback is None and @@ -2937,12 +2982,13 @@ class RepoFilter(object): self._input = self._fep.stdout if self._args.dry_run or self._args.debug: self._fe_orig = os.path.join(self.results_tmp_dir(), - 'fast-export.original') - output = open(self._fe_orig, 'w') + b'fast-export.original') + output = open(self._fe_orig, 'bw') self._input = InputFileBackup(self._input, output) if self._args.debug: print("[DEBUG] Running: {}".format(' '.join(fep_cmd))) - print(" (saving a copy of the output at {})".format(self._fe_orig)) + print(" (saving a copy of the output at {})" + .format(decode(self._fe_orig))) def _setup_output(self): if not self._args.dry_run: @@ -2955,20 +3001,21 @@ class RepoFilter(object): self._import_pipes = (self._fip.stdin, self._fip.stdout) if self._args.dry_run or self._args.debug: self._fe_filt = os.path.join(self.results_tmp_dir(), - 'fast-export.filtered') - self._output = open(self._fe_filt, 'w') + b'fast-export.filtered') + self._output = open(self._fe_filt, 'bw') else: self._output = self._fip.stdin if self._args.debug: self._output = DualFileWriter(self._fip.stdin, self._output) print("[DEBUG] Running: {}".format(' '.join(fip_cmd))) - print(" (using the following file as input: {})".format(self._fe_filt)) + print(" (using the following file as input: {})" + .format(decode(self._fe_filt))) def _migrate_origin_to_heads(self): if self._args.dry_run: return refs_to_migrate = set(x for x in self._orig_refs - if x.startswith('refs/remotes/origin/')) + if x.startswith(b'refs/remotes/origin/')) if not refs_to_migrate: return if self._args.debug: @@ -2978,14 +3025,14 @@ class RepoFilter(object): stdin=subprocess.PIPE, cwd=target_working_dir) for ref in refs_to_migrate: - if ref == 'refs/remotes/origin/HEAD': - p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref])) + if ref == b'refs/remotes/origin/HEAD': + p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref])) del self._orig_refs[ref] continue - newref = ref.replace('refs/remotes/origin/', 'refs/heads/') + newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/') if newref not in self._orig_refs: - p.stdin.write('create {} {}\n'.format(newref, self._orig_refs[ref])) - p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref])) + p.stdin.write(b'create %s %s\n' % (newref, self._orig_refs[ref])) + p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref])) self._orig_refs[newref] = self._orig_refs[ref] del self._orig_refs[ref] p.stdin.close() @@ -3067,10 +3114,10 @@ class RepoFilter(object): print(_("NOTE: Not running fast-import or cleaning up; --dry-run passed.")) if self._fe_orig: print(_(" Requested filtering can be seen by comparing:")) - print(" " + self._fe_orig) + print(" " + decode(self._fe_orig)) else: print(_(" Requested filtering can be seen at:")) - print(" " + self._fe_filt) + print(" " + decode(self._fe_filt)) return target_working_dir = self._args.target or '.' @@ -3080,11 +3127,11 @@ class RepoFilter(object): if refs_to_nuke: if self._args.debug: print("[DEBUG] Deleting the following refs:\n "+ - "\n ".join(refs_to_nuke)) + decode(b"\n ".join(refs_to_nuke))) p = subprocess.Popen('git update-ref --stdin'.split(), stdin=subprocess.PIPE, cwd=target_working_dir) - p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x) + p.stdin.write(b''.join([b"option no-deref\ndelete %s\n" % x for x in refs_to_nuke])) p.stdin.close() if p.wait(): diff --git a/t/run_coverage b/t/run_coverage index 0e2fe74..3abd9af 100755 --- a/t/run_coverage +++ b/t/run_coverage @@ -21,8 +21,8 @@ export PYTHONPATH=$tmpdir: ls t939*.sh | xargs -n 1 bash cd $tmpdir -python-coverage combine -python-coverage html -d $orig_dir/report -python-coverage report -m +python3-coverage combine +python3-coverage html -d $orig_dir/report +python3-coverage report -m cd $orig_dir rm -rf $tmpdir diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh index 52221d1..8a674ab 100755 --- a/t/t9390-filter-repo.sh +++ b/t/t9390-filter-repo.sh @@ -450,15 +450,15 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' ' head -n 9 README >actual && test_cmp expect actual && - cat | tr Q "\047" >expect <<-\EOF && + cat >expect <<-\EOF && === Files by sha and associated pathnames in reverse size === Format: sha, unpacked size, packed size, filename(s) object stored as a89c82a2d4b713a125a4323d25adda062cc0013d 44 48 numbers/medium.num f00c965d8307308469e537302baa73048488f162 21 37 numbers/small.num 2aa69a2a708eed00cb390e30f6bcc3eed773f390 20 36 whatever - 51b95456de9274c9a95f756742808dfd480b9b35 13 29 [QcapriciousQ, QfickleQ, QmercurialQ] - 732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [Qsequence/knowQ, Qwords/knowQ] - 34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [Qsequence/toQ, Qwords/toQ] + 51b95456de9274c9a95f756742808dfd480b9b35 13 29 [capricious, fickle, mercurial] + 732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [sequence/know, words/know] + 34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [sequence/to, words/to] 7ecb56eb3fa3fa6f19dd48bca9f971950b119ede 3 18 words/know EOF test_cmp expect blob-shas-and-paths.txt && @@ -795,7 +795,7 @@ test_expect_success 'incremental import' ' original=$(git rev-parse master) && git fast-export --reference-excluded-parents master~2..master \ - | git filter-repo --stdin --refname-callback "return \"develop\"" && + | git filter-repo --stdin --refname-callback "return b\"develop\"" && test "$(git rev-parse develop)" = "$original" ) ' diff --git a/t/t9391-filter-repo-lib-usage.sh b/t/t9391-filter-repo-lib-usage.sh index e923d29..a967f31 100755 --- a/t/t9391-filter-repo-lib-usage.sh +++ b/t/t9391-filter-repo-lib-usage.sh @@ -158,7 +158,7 @@ test_expect_success 'other error cases' ' mkdir other && cd other && - ! python -c "import git_filter_repo as fr; fr.GitUtils.get_commit_count(\".\", [\"HEAD\"])" 2>err && + ! python3 -c "import git_filter_repo as fr; fr.GitUtils.get_commit_count(\".\", [\"HEAD\"])" 2>err && test_i18ngrep ". does not appear to be a valid git repository" err ) ' diff --git a/t/t9391/commit_info.py b/t/t9391/commit_info.py index e697bd8..01fd725 100755 --- a/t/t9391/commit_info.py +++ b/t/t9391/commit_info.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -13,12 +13,12 @@ import git_filter_repo as fr def change_up_them_commits(commit): # Change the commit author - if commit.author_name == "Copy N. Paste": - commit.author_name = "Ima L. Oser" - commit.author_email = "aloser@my.corp" + if commit.author_name == b"Copy N. Paste": + commit.author_name = b"Ima L. Oser" + commit.author_email = b"aloser@my.corp" # Fix the author email - commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email) + commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email) # Fix the committer date (bad timezone conversion in initial import) oldtime = fr.string_to_date(commit.committer_date) @@ -26,7 +26,7 @@ def change_up_them_commits(commit): commit.committer_date = fr.date_to_string(newtime) # Fix the commit message - commit.message = re.sub("Marketing is staffed with pansies", "", + commit.message = re.sub(b"Marketing is staffed with pansies", b"", commit.message) args = fr.FilteringOptions.parse_args(['--force']) diff --git a/t/t9391/create_fast_export_output.py b/t/t9391/create_fast_export_output.py index a1b21e0..1eb0a3d 100755 --- a/t/t9391/create_fast_export_output.py +++ b/t/t9391/create_fast_export_output.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -23,82 +23,82 @@ out.importer_only() output = out._output -world = Blob("Hello") +world = Blob(b"Hello") world.dump(output) -bar = Blob("foo\n") +bar = Blob(b"foo\n") bar.dump(output) -master = Reset("refs/heads/master") +master = Reset(b"refs/heads/master") master.dump(output) -changes = [FileChanges('M', 'world', world.id, mode="100644"), - FileChanges('M', 'bar', bar.id, mode="100644")] +changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"), + FileChanges(b'M', b'bar', bar.id, mode=b"100644")] when = datetime(year=2005, month=4, day=7, hour=15, minute=16, second=10, - tzinfo=FixedTimeZone("-0700")) + tzinfo=FixedTimeZone(b"-0700")) when_string = fr.date_to_string(when) -commit1 = Commit("refs/heads/master", - "A U Thor", "au@thor.email", when_string, - "Com M. Iter", "comm@iter.email", when_string, - "My first commit! Wooot!\n\nLonger description", +commit1 = Commit(b"refs/heads/master", + b"A U Thor", b"au@thor.email", when_string, + b"Com M. Iter", b"comm@iter.email", when_string, + b"My first commit! Wooot!\n\nLonger description", changes, parents = []) commit1.dump(output) -world = Blob("Hello\nHi") +world = Blob(b"Hello\nHi") world.dump(output) -world_link = Blob("world") +world_link = Blob(b"world") world_link.dump(output) -changes = [FileChanges('M', 'world', world.id, mode="100644"), - FileChanges('M', 'planet', world_link.id, mode="120000")] +changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"), + FileChanges(b'M', b'planet', world_link.id, mode=b"120000")] when += timedelta(days=3, hours=4, minutes=6) when_string = fr.date_to_string(when) -commit2 = Commit("refs/heads/master", - "A U Thor", "au@thor.email", when_string, - "Com M. Iter", "comm@iter.email", when_string, - "Make a symlink to world called planet, modify world", +commit2 = Commit(b"refs/heads/master", + b"A U Thor", b"au@thor.email", when_string, + b"Com M. Iter", b"comm@iter.email", when_string, + b"Make a symlink to world called planet, modify world", changes, parents = [commit1.id]) commit2.dump(output) -script = Blob("#!/bin/sh\n\necho Hello") +script = Blob(b"#!/bin/sh\n\necho Hello") script.dump(output) -changes = [FileChanges('M', 'runme', script.id, mode="100755"), - FileChanges('D', 'bar')] -when_string = "1234567890 -0700" -commit3 = Commit("refs/heads/master", - "A U Thor", "au@thor.email", when_string, - "Com M. Iter", "comm@iter.email", when_string, - "Add runme script, remove bar", +changes = [FileChanges(b'M', b'runme', script.id, mode=b"100755"), + FileChanges(b'D', b'bar')] +when_string = b"1234567890 -0700" +commit3 = Commit(b"refs/heads/master", + b"A U Thor", b"au@thor.email", when_string, + b"Com M. Iter", b"comm@iter.email", when_string, + b"Add runme script, remove bar", changes, parents = [commit2.id]) commit3.dump(output) -progress = Progress("Done with the master branch now...") +progress = Progress(b"Done with the master branch now...") progress.dump(output) checkpoint = Checkpoint() checkpoint.dump(output) -devel = Reset("refs/heads/devel", commit1.id) +devel = Reset(b"refs/heads/devel", commit1.id) devel.dump(output) -world = Blob("Hello\nGoodbye") +world = Blob(b"Hello\nGoodbye") world.dump(output) -changes = [FileChanges('M', 'world', world.id, mode="100644")] -when = datetime(2006, 8, 17, tzinfo=FixedTimeZone("+0200")) +changes = [FileChanges(b'M', b'world', world.id, mode=b"100644")] +when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200")) when_string = fr.date_to_string(when) -commit4 = Commit("refs/heads/devel", - "A U Thor", "au@thor.email", when_string, - "Com M. Iter", "comm@iter.email", when_string, - "Modify world", +commit4 = Commit(b"refs/heads/devel", + b"A U Thor", b"au@thor.email", when_string, + b"Com M. Iter", b"comm@iter.email", when_string, + b"Modify world", changes, parents = [commit1.id]) commit4.dump(output) -world = Blob("Hello\nHi\nGoodbye") +world = Blob(b"Hello\nHi\nGoodbye") world.dump(output) when = fr.string_to_date(commit3.author_date) + timedelta(days=47) when_string = fr.date_to_string(when) @@ -106,22 +106,22 @@ when_string = fr.date_to_string(when) # to the first parent. Thus, despite the fact that runme and planet have # not changed and bar was not modified in the devel side, we have to list them # all anyway. -changes = [FileChanges('M', 'world', world.id, mode="100644"), - FileChanges('D', 'bar'), - FileChanges('M', 'runme', script.id, mode="100755"), - FileChanges('M', 'planet', world_link.id, mode="120000")] +changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"), + FileChanges(b'D', b'bar'), + FileChanges(b'M', b'runme', script.id, mode=b"100755"), + FileChanges(b'M', b'planet', world_link.id, mode=b"120000")] -commit5 = Commit("refs/heads/devel", - "A U Thor", "au@thor.email", when_string, - "Com M. Iter", "comm@iter.email", when_string, - "Merge branch 'master'\n", +commit5 = Commit(b"refs/heads/devel", + b"A U Thor", b"au@thor.email", when_string, + b"Com M. Iter", b"comm@iter.email", when_string, + b"Merge branch 'master'\n", changes, parents = [commit4.id, commit3.id]) commit5.dump(output) -mytag = Tag("refs/tags/v1.0", commit5.id, - "His R. Highness", "royalty@my.kingdom", when_string, - "I bequeath to my peons this royal software") +mytag = Tag(b"refs/tags/v1.0", commit5.id, + b"His R. Highness", b"royalty@my.kingdom", when_string, + b"I bequeath to my peons this royal software") mytag.dump(output) out.finish() diff --git a/t/t9391/erroneous.py b/t/t9391/erroneous.py index a5c05d2..db6051b 100755 --- a/t/t9391/erroneous.py +++ b/t/t9391/erroneous.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the diff --git a/t/t9391/file_filter.py b/t/t9391/file_filter.py index f6a1ae9..c3683fc 100755 --- a/t/t9391/file_filter.py +++ b/t/t9391/file_filter.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -15,14 +15,14 @@ import sys import git_filter_repo as fr def drop_file_by_contents(blob): - bad_file_contents = 'The launch code is 1-2-3-4.' + bad_file_contents = b'The launch code is 1-2-3-4.' if blob.data == bad_file_contents: blob.skip() def drop_files_by_name(commit): new_file_changes = [] for change in commit.file_changes: - if not change.filename.endswith('.doc'): + if not change.filename.endswith(b'.doc'): new_file_changes.append(change) commit.file_changes = new_file_changes diff --git a/t/t9391/print_progress.py b/t/t9391/print_progress.py index 5256b74..bbca538 100755 --- a/t/t9391/print_progress.py +++ b/t/t9391/print_progress.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -21,8 +21,8 @@ commit_count = 0 def print_progress(): global object_count, commit_count, total_objects, total_commits - print "\rRewriting commits... %d/%d (%d objects)" \ - % (commit_count, total_commits, object_count), + print("\rRewriting commits... %d/%d (%d objects)" + % (commit_count, total_commits, object_count), end='') def my_blob_callback(blob): global object_count diff --git a/t/t9391/rename-master-to-develop.py b/t/t9391/rename-master-to-develop.py index f92517a..1acfef8 100755 --- a/t/t9391/rename-master-to-develop.py +++ b/t/t9391/rename-master-to-develop.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -14,8 +14,8 @@ not try to handle any such special cases. import git_filter_repo as fr def my_commit_callback(commit): - if commit.branch == "refs/heads/master": - commit.branch = "refs/heads/develop" + if commit.branch == b"refs/heads/master": + commit.branch = b"refs/heads/develop" args = fr.FilteringOptions.default_options() args.force = True diff --git a/t/t9391/splice_repos.py b/t/t9391/splice_repos.py index 00d0058..5993436 100755 --- a/t/t9391/splice_repos.py +++ b/t/t9391/splice_repos.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -29,12 +29,12 @@ class InterleaveRepositories: def hold_commit(self, commit): commit.skip(new_id = commit.id) - letter = re.match('Commit (.)', commit.message).group(1) + letter = re.match(b'Commit (.)', commit.message).group(1) self.commit_map[letter] = commit def weave_commit(self, commit): - letter = re.match('Commit (.)', commit.message).group(1) - prev_letter = chr(ord(letter)-1) + letter = re.match(b'Commit (.)', commit.message).group(1) + prev_letter = bytes([ord(letter)-1]) # Splice in any extra commits needed if prev_letter in self.commit_map: @@ -53,10 +53,10 @@ class InterleaveRepositories: fr.record_id_rename(new_commit.id, commit.id) def run(self): - blob = fr.Blob('public gpg key contents') - tag = fr.Tag('gpg-pubkey', blob.id, - 'Ima Tagger', 'ima@tagg.er', '1136199845 +0300', - 'Very important explanation and stuff') + blob = fr.Blob(b'public gpg key contents') + tag = fr.Tag(b'gpg-pubkey', blob.id, + b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300', + b'Very important explanation and stuff') args = fr.FilteringOptions.parse_args(['--target', self.output_dir]) out = fr.RepoFilter(args) diff --git a/t/t9391/strip-cvs-keywords.py b/t/t9391/strip-cvs-keywords.py index 1067d55..ae7cda0 100755 --- a/t/t9391/strip-cvs-keywords.py +++ b/t/t9391/strip-cvs-keywords.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Please see the @@ -18,8 +18,8 @@ def strip_cvs_keywords(blob): # FIXME: Should first check if blob is a text file to avoid ruining # binaries. Could use python.magic here, or just output blob.data to # the unix 'file' command - pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$' - replacement = r'$\1$' + pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$' + replacement = br'$\1$' blob.data = re.sub(pattern, replacement, blob.data) args = fr.FilteringOptions.parse_args(['--force']) diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py index 3167c0f..190f82b 100755 --- a/t/t9391/unusual.py +++ b/t/t9391/unusual.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Please: DO NOT USE THIS AS AN EXAMPLE. # @@ -14,14 +14,14 @@ import collections import os import random -import StringIO +import io import sys import textwrap import git_filter_repo as fr def handle_progress(progress): - print("Decipher this: "+''.join(reversed(progress.message))) + print(b"Decipher this: "+bytes(reversed(progress.message))) def handle_checkpoint(checkpoint_object): # Flip a coin; see if we want to pass the checkpoint through. @@ -44,13 +44,13 @@ def track_everything(obj): # projects, I'm just verifying an invariant of the current code. assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1] -mystr = 'This is the contents of the blob' -compare = "Blob:\n blob\n mark :1\n data {}\n {}".format(len(mystr), mystr) +mystr = b'This is the contents of the blob' +compare = b"Blob:\n blob\n mark :1\n data %d\n %s" % (len(mystr), mystr) # Next line's only purpose is testing code coverage of something that helps # debugging git-filter-repo; it is NOT something external folks should depend # upon. myblob = fr.Blob(mystr) -assert str(myblob) == compare +assert bytes(myblob) == compare # Everyone should be using RepoFilter objects, not FastExportFilter. But for # testing purposes... filter = fr.FastExportFilter('.', @@ -58,8 +58,8 @@ filter = fr.FastExportFilter('.', checkpoint_callback = handle_checkpoint, everything_callback = track_everything) -filter.run(input = sys.stdin, - output = open(os.devnull, 'w'), +filter.run(input = sys.stdin.detach(), + output = open(os.devnull, 'bw'), fast_import_pipes = None, quiet = True) # DO NOT depend upon or use _IDS directly you external script writers. I'm @@ -71,7 +71,7 @@ print("Found {} blobs/commits and {} other objects" .format(total_objects['common'], total_objects['uncommon'])) -stream = StringIO.StringIO(textwrap.dedent(''' +stream = io.BytesIO(textwrap.dedent(''' blob mark :1 data 5 @@ -102,14 +102,14 @@ stream = StringIO.StringIO(textwrap.dedent(''' from :3 M 100644 :1 salutation - '''[1:])) + '''[1:]).encode()) counts = collections.Counter() def look_for_reset(obj): print("Processing {}".format(obj)) counts[type(obj)] += 1 if type(obj) == fr.Reset: - assert obj.ref == 'refs/heads/B' + assert obj.ref == b'refs/heads/B' # Use all kinds of internals that external scripts should NOT use and which # are likely to break in the future, just to verify a few invariants... diff --git a/t/t9392-python-callback.sh b/t/t9392-python-callback.sh index 983879e..27c338c 100755 --- a/t/t9392-python-callback.sh +++ b/t/t9392-python-callback.sh @@ -51,7 +51,7 @@ test_expect_success '--filename-callback' ' setup filename-callback && ( cd filename-callback && - git filter-repo --filename-callback "return None if filename.endswith(\".doc\") else \"src/\"+filename" && + git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" && git log --format=%n --name-only | sort | uniq | grep -v ^$ > f && ! grep file.doc f && COMPARE=$(wc -l log-messages && grep TLDR:...... log-messages >modified-messages && test_line_count = 6 modified-messages @@ -75,7 +75,7 @@ test_expect_success '--name-callback' ' setup name-callback && ( cd name-callback && - git filter-repo --name-callback "return name.replace(\"N.\", \"And\")" && + git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" && git log --format=%an >log-person-names && grep Copy.And.Paste log-person-names ) @@ -85,7 +85,7 @@ test_expect_success '--email-callback' ' setup email-callback && ( cd email-callback && - git filter-repo --email-callback "return email.replace(\".com\", \".org\")" && + git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" && git log --format=%ae%n%ce >log-emails && ! grep .com log-emails && grep .org log-emails @@ -98,7 +98,7 @@ test_expect_success '--refname-callback' ' cd refname-callback && git filter-repo --refname-callback " dir,path = os.path.split(refname) - return dir+\"/prefix-\"+path" && + return dir+b\"/prefix-\"+path" && git show-ref | grep refs/heads/prefix-master && git show-ref | grep refs/tags/prefix-v1.0 && git show-ref | grep refs/tags/prefix-v2.0 @@ -110,7 +110,7 @@ test_expect_success '--refname-callback sanity check' ' ( cd refname-sanity-check && - test_must_fail git filter-repo --refname-callback "return re.sub(\"tags\", \"other-tags\", refname)" 2>../err && + test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err && test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err && rm ../err ) @@ -138,7 +138,7 @@ test_expect_success '--commit-callback' ' commit.committer_email = commit.author_email commit.committer_date = commit.author_date for change in commit.file_changes: - change.mode = \"100755\" + change.mode = b\"100755\" " && git log --format=%ae%n%ce >log-emails && ! grep committer@example.com log-emails && @@ -153,8 +153,8 @@ test_expect_success '--tag-callback' ' ( cd tag-callback && git filter-repo --tag-callback " - tag.tagger_name = \"Dr. \"+tag.tagger_name - tag.message = \"Awesome sauce \"+tag.message + tag.tagger_name = b\"Dr. \"+tag.tagger_name + tag.message = b\"Awesome sauce \"+tag.message " && git cat-file -p v2.0 | grep ^tagger.Dr\\. && git cat-file -p v2.0 | grep ^Awesome.sauce.Super @@ -175,7 +175,7 @@ test_expect_success 'callback has return statement sanity check' ' ( cd callback_return_sanity && - test_must_fail git filter-repo --filename-callback "filename + \".txt\"" 2>../err&& + test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&& test_i18ngrep "Error: --filename-callback should have a return statement" ../err && rm ../err )