From 35052f673d314eae542926dce393d4b77fe4ff26 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 15:18:59 -0700
Subject: [PATCH] filter-repo (python3): replace strings with bytestrings

This is by far the largest python3 change; it consists basically of
  * using b'<str>' instead of '<str>' in lots of places
  * adding a .encode() if we really do work with a string but need to
    get it converted to a bytestring
  * replace uses of .format() with interpolation via the '%' operator,
    since bytestrings don't have a .format() method.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo                      | 636 ++++++++++++++-------------
 t/t9390-filter-repo.sh               |  10 +-
 t/t9391/commit_info.py               |  10 +-
 t/t9391/create_fast_export_output.py |  94 ++--
 t/t9391/file_filter.py               |   4 +-
 t/t9391/rename-master-to-develop.py  |   4 +-
 t/t9391/splice_repos.py              |  12 +-
 t/t9391/strip-cvs-keywords.py        |   4 +-
 t/t9391/unusual.py                   |  10 +-
 t/t9392-python-callback.sh           |  20 +-
 10 files changed, 408 insertions(+), 396 deletions(-)
diff --git a/git-filter-repo b/git-filter-repo
index 19742a1..e66e27e 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -87,12 +87,12 @@ class FixedTimeZone(tzinfo):
   Fixed offset in minutes east from UTC.
   """
 
-  tz_re = re.compile(r'^([-+]?)(\d\d)(\d\d)$')
+  tz_re = re.compile(br'^([-+]?)(\d\d)(\d\d)$')
 
   def __init__(self, offset_string):
     tzinfo.__init__(self)
     sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups()
-    factor = -1 if (sign and sign == '-') else 1
+    factor = -1 if (sign and sign == b'-') else 1
     self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm)))
     self._offset_string = offset_string
 
@@ -112,8 +112,8 @@ def string_to_date(datestring):
 
 def date_to_string(dateobj):
   epoch = datetime.fromtimestamp(0, dateobj.tzinfo)
-  return('{} {}'.format(int(_timedelta_to_seconds(dateobj - epoch)),
-                        dateobj.tzinfo.tzname(0)))
+  return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)),
+                     dateobj.tzinfo.tzname(0)))
 
 def decode(bytestr):
   'Try to convert bytestr to utf-8 for outputting as an error message.'
@@ -147,21 +147,21 @@ def glob_to_regex(glob_bytestr):
   return regex.encode()
 
 class PathQuoting:
-  _unescape = {'a': '\a',
-               'b': '\b',
-               'f': '\f',
-               'n': '\n',
-               'r': '\r',
-               't': '\t',
-               'v': '\v',
-               '"': '"',
-               '\\':'\\'}
-  _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})')
+  _unescape = {b'a': b'\a',
+               b'b': b'\b',
+               b'f': b'\f',
+               b'n': b'\n',
+               b'r': b'\r',
+               b't': b'\t',
+               b'v': b'\v',
+               b'"': b'"',
+               b'\\':b'\\'}
+  _unescape_re = re.compile(br'\\([a-z"\\]|[0-9]{3})')
   _escape = [bytes([x]) for x in range(127)]+[
-             '\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
+             b'\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
   _reverse = dict(map(reversed, _unescape.items()))
   for x in _reverse:
-    _escape[ord(x)] = '\\'+_reverse[x]
+    _escape[ord(x)] = b'\\'+_reverse[x]
   _special_chars = [len(x) > 1 for x in _escape]
 
   @staticmethod
@@ -171,8 +171,8 @@ class PathQuoting:
 
   @staticmethod
   def dequote(quoted_string):
-    if quoted_string.startswith('"'):
-      assert quoted_string.endswith('"')
+    if quoted_string.startswith(b'"'):
+      assert quoted_string.endswith(b'"')
       return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence,
                                           quoted_string[1:-1])
     return quoted_string
@@ -183,9 +183,9 @@ class PathQuoting:
     #    pqsc = PathQuoting._special_chars
     #    if any(pqsc[x] for x in set(unquoted_string)):
     # Option 2, perf hack: do minimal amount of quoting required by fast-import
-    if unquoted_string.startswith('"') or '\n' in unquoted_string:
+    if unquoted_string.startswith(b'"') or b'\n' in unquoted_string:
       pqe = PathQuoting._escape
-      return '"' + ''.join(pqe[x] for x in unquoted_string) + '"'
+      return b'"' + b''.join(pqe[x] for x in unquoted_string) + b'"'
     return unquoted_string
 
 class AncestryGraph(object):
@@ -263,8 +263,8 @@ class MailmapInfo(object):
     self._parse_file(filename)
 
   def _parse_file(self, filename):
-    name_and_email_re = re.compile(r'(.*?)\s*<([^>]+)>\s*')
-    comment_re = re.compile(r'\s*#.*')
+    name_and_email_re = re.compile(br'(.*?)\s*<([^>]+)>\s*')
+    comment_re = re.compile(br'\s*#.*')
     if not os.access(filename, os.R_OK):
       raise SystemExit(_("Cannot read %s") % decode(filename))
     with open(filename, 'br') as f:
@@ -273,7 +273,7 @@ class MailmapInfo(object):
         count += 1
         err = "Unparseable mailmap file: line #{} is bad: {}".format(count, line)
         # Remove comments
-        line = comment_re.sub('', line)
+        line = comment_re.sub(b'', line)
         # Remove leading and trailing whitespace
         line = line.strip()
         if not line:
@@ -443,7 +443,8 @@ class _GitElement(object):
     output_lines = writeme.getvalue().splitlines()
     writeme.close()
     self.dumped = old_dumped
-    return "{}:\n  {}".format(type(self).__name__, "\n  ".join(output_lines))
+    return b"%s:\n  %s" % (type(self).__name__.encode(),
+                           b"\n  ".join(output_lines))
 
   def skip(self, new_id=None):
     """
@@ -491,6 +492,7 @@ class Blob(_GitElementWithId):
     self.original_id = original_id
 
     # Stores the blob's data
+    assert(type(data) == bytes)
     self.data = data
 
   def dump(self, file_):
@@ -499,10 +501,10 @@ class Blob(_GitElementWithId):
     """
     self.dumped = 1
 
-    file_.write('blob\n')
-    file_.write('mark :%d\n' % self.id)
-    file_.write('data %d\n%s' % (len(self.data), self.data))
-    file_.write('\n')
+    file_.write(b'blob\n')
+    file_.write(b'mark :%d\n' % self.id)
+    file_.write(b'data %d\n%s' % (len(self.data), self.data))
+    file_.write(b'\n')
 
 
 class Reset(_GitElement):
@@ -530,10 +532,10 @@ class Reset(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('reset %s\n' % self.ref)
+    file_.write(b'reset %s\n' % self.ref)
     if self.from_ref:
-      file_.write('from :%d\n' % self.from_ref)
-      file_.write('\n')
+      file_.write(b'from :%d\n' % self.from_ref)
+      file_.write(b'\n')
 
 class FileChanges(_GitElement):
   """
@@ -544,7 +546,10 @@ class FileChanges(_GitElement):
   def __init__(self, type_, filename, id_ = None, mode = None):
     _GitElement.__init__(self)
 
-    # Denote the type of file-change (M for modify, D for delete, etc)
+    # Denote the type of file-change (b'M' for modify, b'D' for delete, etc)
+    # We could
+    #   assert(type(type_) == bytes)
+    # here but I don't just due to worries about performance overhead...
     self.type = type_
 
     # Record the name of the file being changed
@@ -557,15 +562,15 @@ class FileChanges(_GitElement):
     # blob_id is the id (mark) of the affected blob
     self.blob_id = None
 
-    # For 'M' file changes (modify), expect to have id and mode
-    if type_ == 'M':
+    # For b'M' file changes (modify), expect to have id and mode
+    if type_ == b'M':
       if mode is None:
         raise SystemExit(_("file mode and idnum needed for %s") % filename) # pragma: no cover
       self.mode = mode
       self.blob_id = id_
 
-    # For 'R' file changes (rename), expect to have newname as third arg
-    elif type_ == 'R':  # pragma: no cover (now avoid fast-export renames)
+    # For b'R' file changes (rename), expect to have newname as third arg
+    elif type_ == b'R':  # pragma: no cover (now avoid fast-export renames)
       if id_ is None:
         raise SystemExit(_("new name needed for rename of %s") % filename)
       self.filename = (self.filename, id_)
@@ -574,17 +579,17 @@ class FileChanges(_GitElement):
     """
     Write this file-change element to a file
     """
-    skipped_blob = (self.type == 'M' and self.blob_id is None)
+    skipped_blob = (self.type == b'M' and self.blob_id is None)
     if skipped_blob: return
     self.dumped = 1
 
     quoted_filename = PathQuoting.enquote(self.filename)
-    if self.type == 'M' and isinstance(self.blob_id, int):
-      file_.write('M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename))
-    elif self.type == 'M':
-      file_.write('M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename))
-    elif self.type == 'D':
-      file_.write('D %s\n' % quoted_filename)
+    if self.type == b'M' and isinstance(self.blob_id, int):
+      file_.write(b'M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename))
+    elif self.type == b'M':
+      file_.write(b'M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename))
+    elif self.type == b'D':
+      file_.write(b'D %s\n' % quoted_filename)
     else:
       raise SystemExit(_("Unhandled filechange type: %s") % self.type) # pragma: no cover
 
@@ -648,16 +653,16 @@ class Commit(_GitElementWithId):
 
     # Make output to fast-import slightly easier for humans to read if the
     # message has no trailing newline of its own; cosmetic, but a nice touch...
-    extra_newline = '\n'
-    if self.message.endswith('\n') or not (self.parents or self.file_changes):
-      extra_newline = ''
+    extra_newline = b'\n'
+    if self.message.endswith(b'\n') or not (self.parents or self.file_changes):
+      extra_newline = b''
 
-    file_.write(('commit {}\n'
-                 'mark :{}\n'
-                 'author {} <{}> {}\n'
-                 'committer {} <{}> {}\n'
-                 'data {}\n{}{}'
-                ).format(
+    file_.write((b'commit %s\n'
+                 b'mark :%d\n'
+                 b'author %s <%s> %s\n'
+                 b'committer %s <%s> %s\n'
+                 b'data %d\n%s%s'
+                ) % (
                   self.branch, self.id,
                   self.author_name, self.author_email, self.author_date,
                   self.committer_name, self.committer_email, self.committer_date,
@@ -665,16 +670,18 @@ class Commit(_GitElementWithId):
                   extra_newline)
                )
     for i, parent in enumerate(self.parents):
-      mark = ':' if isinstance(parent, int) else ''
-      file_.write('from ' if i==0 else 'merge ')
-      file_.write('{}{}\n'.format(mark, parent))
+      file_.write(b'from ' if i==0 else b'merge ')
+      if isinstance(parent, int):
+        file_.write(b':%d\n' % parent)
+      else:
+        file_.write(b'%s\n' % parent)
     for change in self.file_changes:
       change.dump(file_)
     if not self.parents and not self.file_changes:
       # Workaround a bug in pre-git-2.22 versions of fast-import with
       # the get-mark directive.
-      file_.write('\n')
-    file_.write('\n')
+      file_.write(b'\n')
+    file_.write(b'\n')
 
   def first_parent(self):
     """
@@ -729,15 +736,15 @@ class Tag(_GitElement):
 
     self.dumped = 1
 
-    file_.write('tag %s\n' % self.ref)
-    mark = ':' if isinstance(self.from_ref, int) else ''
-    file_.write('from {}{}\n'.format(mark, self.from_ref))
+    file_.write(b'tag %s\n' % self.ref)
+    markfmt = b'from :%d\n' if isinstance(self.from_ref, int) else 'from %s\n'
+    file_.write(markfmt % self.from_ref)
     if self.tagger_name:
-      file_.write('tagger %s <%s> ' % (self.tagger_name, self.tagger_email))
+      file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email))
       file_.write(self.tagger_date)
-      file_.write('\n')
-    file_.write('data %d\n%s' % (len(self.message), self.message))
-    file_.write('\n')
+      file_.write(b'\n')
+    file_.write(b'data %d\n%s' % (len(self.message), self.message))
+    file_.write(b'\n')
 
 class Progress(_GitElement):
   """
@@ -761,8 +768,8 @@ class Progress(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('progress %s\n' % self.message)
-    file_.write('\n')
+    file_.write(b'progress %s\n' % self.message)
+    file_.write(b'\n')
 
 class Checkpoint(_GitElement):
   """
@@ -784,8 +791,8 @@ class Checkpoint(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('checkpoint\n')
-    file_.write('\n')
+    file_.write(b'checkpoint\n')
+    file_.write(b'\n')
 
 class LiteralCommand(_GitElement):
   """
@@ -910,20 +917,20 @@ class FastExportFilter(object):
     self._files_tweaked = set()
 
     # Compile some regexes and cache those
-    self._mark_re = re.compile(r'mark :(\d+)\n$')
+    self._mark_re = re.compile(br'mark :(\d+)\n$')
     self._parent_regexes = {}
-    parent_regex_rules = ('{} :(\d+)\n$', '{} ([0-9a-f]{{40}})\n')
-    for parent_refname in ('from', 'merge'):
-      ans = [re.compile(x.format(parent_refname)) for x in parent_regex_rules]
+    parent_regex_rules = (b' :(\d+)\n$', b' ([0-9a-f]{40})\n')
+    for parent_refname in (b'from', b'merge'):
+      ans = [re.compile(parent_refname+x) for x in parent_regex_rules]
       self._parent_regexes[parent_refname] = ans
-    self._quoted_string_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+    self._quoted_string_re = re.compile(br'"(?:[^"\\]|\\.)*"')
     self._refline_regexes = {}
-    for refline_name in ('reset', 'commit', 'tag', 'progress'):
-      self._refline_regexes[refline_name] = re.compile(refline_name+' (.*)\n$')
+    for refline_name in (b'reset', b'commit', b'tag', b'progress'):
+      self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\n$')
     self._user_regexes = {}
-    for user in ('author', 'committer', 'tagger'):
-      self._user_regexes[user] = re.compile(user + ' (.*?) <(.*?)> (.*)\n$')
-    self._hash_re = re.compile(r'(\b[0-9a-f]{7,40}\b)')
+    for user in (b'author', b'committer', b'tagger'):
+      self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\n$')
+    self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
 
   def _advance_currentline(self):
     """
@@ -971,51 +978,51 @@ class FastExportFilter(object):
     """
     If the current line contains a file-change object, then parse it
     and advance the current line; otherwise return None. We only care
-    about file changes of type 'M' and 'D' (these are the only types
+    about file changes of type b'M' and b'D' (these are the only types
     of file-changes that fast-export will provide).
     """
     filechange = None
     changetype = self._currentline[0:1]
-    if changetype == 'M':
+    if changetype == b'M':
       (changetype, mode, idnum, path) = self._currentline.split(None, 3)
-      if idnum[0:1] == ':':
+      if idnum[0:1] == b':':
         idnum = idnum[1:]
-      path = path.rstrip('\n')
+      path = path.rstrip(b'\n')
       # We translate the idnum to our id system
       if len(idnum) != 40:
         idnum = _IDS.translate( int(idnum) )
       if idnum is not None:
-        if path.startswith('"'):
+        if path.startswith(b'"'):
           path = PathQuoting.dequote(path)
-        filechange = FileChanges('M', path, idnum, mode)
+        filechange = FileChanges(b'M', path, idnum, mode)
       else:
-        filechange = 'skipped'
+        filechange = b'skipped'
       self._advance_currentline()
-    elif changetype == 'D':
+    elif changetype == b'D':
       (changetype, path) = self._currentline.split(None, 1)
-      path = path.rstrip('\n')
-      if path.startswith('"'):
+      path = path.rstrip(b'\n')
+      if path.startswith(b'"'):
         path = PathQuoting.dequote(path)
-      filechange = FileChanges('D', path)
+      filechange = FileChanges(b'D', path)
       self._advance_currentline()
-    elif changetype == 'R':  # pragma: no cover (now avoid fast-export renames)
+    elif changetype == b'R':  # pragma: no cover (now avoid fast-export renames)
       rest = self._currentline[2:-1]
-      if rest.startswith('"'):
+      if rest.startswith(b'"'):
         m = self._quoted_string_re.match(rest)
         if not m:
           raise SystemExit(_("Couldn't parse rename source"))
         orig = PathQuoting.dequote(m.group(0))
         new = rest[m.end()+1:]
       else:
-        orig, new = rest.split(' ', 1)
-      if new.startswith('"'):
+        orig, new = rest.split(b' ', 1)
+      if new.startswith(b'"'):
         new = PathQuoting.dequote(new)
-      filechange = FileChanges('R', orig, new)
+      filechange = FileChanges(b'R', orig, new)
       self._advance_currentline()
     return filechange
 
   def _parse_original_id(self):
-    original_id = self._currentline[len('original-oid '):].rstrip()
+    original_id = self._currentline[len(b'original-oid '):].rstrip()
     self._advance_currentline()
     return original_id
 
@@ -1049,8 +1056,8 @@ class FastExportFilter(object):
     # fast-import will not choke on.  Let's do that.  Note that +051800
     # seems to be the only weird timezone found in the wild, by me or some
     # other posts google returned on the subject...
-    if when.endswith('+051800'):
-      when = when[0:-7]+'+0261'
+    if when.endswith(b'+051800'):
+      when = when[0:-7]+b'+0261'
 
     self._advance_currentline()
     return (name, email, when)
@@ -1061,11 +1068,11 @@ class FastExportFilter(object):
     the data.
     """
     fields = self._currentline.split()
-    assert fields[0] == 'data'
+    assert fields[0] == b'data'
     size = int(fields[1])
     data = self._input.read(size)
     self._advance_currentline()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
     return data
 
@@ -1082,11 +1089,11 @@ class FastExportFilter(object):
     id_ = self._parse_optional_mark()
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     data = self._parse_data()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the blob
@@ -1117,9 +1124,9 @@ class FastExportFilter(object):
     the callback).
     """
     # Parse the Reset
-    ref = self._parse_ref_line('reset')
-    ignoreme, from_ref = self._parse_optional_parent_ref('from')
-    if self._currentline == '\n':
+    ref = self._parse_ref_line(b'reset')
+    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # fast-export likes to print extraneous resets that serve no purpose.
@@ -1342,19 +1349,19 @@ class FastExportFilter(object):
     for change in commit.file_changes:
       parent = new_1st_parent or commit.parents[0] # exists due to above checks
       quoted_filename = PathQuoting.enquote(change.filename)
-      self._output.write("ls :{} {}\n".format(parent, quoted_filename))
+      self._output.write(b"ls :%d %s\n" % (parent, quoted_filename))
       self._output.flush()
       parent_version = fi_output.readline().split()
-      if change.type == 'D':
-        if parent_version != ['missing', quoted_filename]:
+      if change.type == b'D':
+        if parent_version != [b'missing', quoted_filename]:
           return False
       else:
         blob_sha = change.blob_id
         if isinstance(change.blob_id, int):
-          self._output.write("get-mark :{}\n".format(change.blob_id))
+          self._output.write(b"get-mark :%d\n" % change.blob_id)
           self._output.flush()
           blob_sha = fi_output.readline().rstrip()
-        if parent_version != [change.mode, 'blob', blob_sha, quoted_filename]:
+        if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]:
           return False
 
     return True
@@ -1364,7 +1371,7 @@ class FastExportFilter(object):
     # Record the mapping of old commit hash to new one
     if commit.original_id and self._fast_import_pipes:
       fi_input, fi_output = self._fast_import_pipes
-      self._output.write("get-mark :{}\n".format(commit.id))
+      self._output.write(b"get-mark :%d\n" % commit.id)
       self._output.flush()
       orig_id = commit.original_id
       self._commit_short_old_hashes[orig_id[0:7]].add(orig_id)
@@ -1390,19 +1397,19 @@ class FastExportFilter(object):
     """
     # Parse the Commit. This may look involved, but it's pretty simple; it only
     # looks bad because a commit object contains many pieces of data.
-    branch = self._parse_ref_line('commit')
+    branch = self._parse_ref_line(b'commit')
     id_ = self._parse_optional_mark()
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     author_name = None
-    if self._currentline.startswith('author'):
-      (author_name, author_email, author_date) = self._parse_user('author')
+    if self._currentline.startswith(b'author'):
+      (author_name, author_email, author_date) = self._parse_user(b'author')
 
     (committer_name, committer_email, committer_date) = \
-      self._parse_user('committer')
+      self._parse_user(b'committer')
 
     if not author_name:
       (author_name, author_email, author_date) = \
@@ -1411,12 +1418,12 @@ class FastExportFilter(object):
     commit_msg = self._parse_data()
     commit_msg = self._hash_re.sub(self._translate_commit_hash, commit_msg)
 
-    pinfo = [self._parse_optional_parent_ref('from')]
+    pinfo = [self._parse_optional_parent_ref(b'from')]
     # Due to empty pruning, we can have real 'from' and 'merge' lines that
     # due to commit rewriting map to a parent of None.  We need to record
     # 'from' if its non-None, and we need to parse all 'merge' lines.
-    while self._currentline.startswith('merge '):
-      pinfo.append(self._parse_optional_parent_ref('merge'))
+    while self._currentline.startswith(b'merge '):
+      pinfo.append(self._parse_optional_parent_ref(b'merge'))
     orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)]
 
     # No parents is oddly represented as [None] instead of [], due to the
@@ -1434,10 +1441,10 @@ class FastExportFilter(object):
     file_change = self._parse_optional_filechange()
     had_file_changes = file_change is not None
     while file_change:
-      if not (type(file_change) == str and file_change == 'skipped'):
+      if not (type(file_change) == bytes and file_change == b'skipped'):
         file_changes.append(file_change)
       file_change = self._parse_optional_filechange()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Okay, now we can finally create the Commit object
@@ -1510,18 +1517,18 @@ class FastExportFilter(object):
     the callback).
     """
     # Parse the Tag
-    tag = self._parse_ref_line('tag')
-    ignoreme, from_ref = self._parse_optional_parent_ref('from')
+    tag = self._parse_ref_line(b'tag')
+    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     tagger_name, tagger_email, tagger_date = None, None, None
-    if self._currentline.startswith('tagger'):
-      (tagger_name, tagger_email, tagger_date) = self._parse_user('tagger')
+    if self._currentline.startswith(b'tagger'):
+      (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger')
     tag_msg = self._parse_data()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the tag
@@ -1544,7 +1551,7 @@ class FastExportFilter(object):
         tag.dump(self._output)
       # Record the fact that this tag was seen so we don't nuke it as part
       # of refs_to_nuke.
-      full_ref = 'refs/tags/{}'.format(tag.ref)
+      full_ref = b'refs/tags/' + tag.ref
       self._seen_refs[full_ref] = None
 
   def _parse_progress(self):
@@ -1556,8 +1563,8 @@ class FastExportFilter(object):
     everything else is done (unless it has been skipped by the callback).
     """
     # Parse the Progress
-    message = self._parse_ref_line('progress')
-    if self._currentline == '\n':
+    message = self._parse_ref_line(b'progress')
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the progress message
@@ -1585,7 +1592,7 @@ class FastExportFilter(object):
     """
     # Parse the Checkpoint
     self._advance_currentline()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the checkpoint
@@ -1632,16 +1639,17 @@ class FastExportFilter(object):
         reset.dump(self._output)
 
   def record_metadata(self, metadata_dir, orig_refs, refs_nuked):
-    deleted_hash = '0'*40
+    deleted_hash = b'0'*40
     self._flush_renames()
-    with open(os.path.join(metadata_dir, 'commit-map'), 'bw') as f:
-      f.write("%-40s %s\n" % (_("old"), _("new")))
+    with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f:
+      f.write(("%-40s %s\n" % (_("old"), _("new"))).encode())
       for (old,new) in self._commit_renames.items():
-        f.write('{} {}\n'.format(old, new if new != None else deleted_hash))
+        msg = b'%s %s\n' % (old, new if new != None else deleted_hash)
+        f.write(msg)
 
     batch_check_process = None
-    batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
-    with open(os.path.join(metadata_dir, 'ref-map'), 'bw') as f:
+    batch_check_output_re = re.compile(b'^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
+    with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f:
       for refname, old_hash in orig_refs.items():
         if refname in refs_nuked:
           new_hash = deleted_hash
@@ -1655,22 +1663,22 @@ class FastExportFilter(object):
                                                    stdin=subprocess.PIPE,
                                                    stdout=subprocess.PIPE,
                                                    cwd=self._repo_working_dir)
-          batch_check_process.stdin.write(refname+"\n")
+          batch_check_process.stdin.write(refname+b"\n")
           batch_check_process.stdin.flush()
           line = batch_check_process.stdout.readline()
           m = batch_check_output_re.match(line)
-          if not m or m.group(2) != 'tag':
+          if not m or m.group(2) != b'tag':
             raise SystemExit(_("Failed to find new id for %(refname)s "
                                "(old id was %(old_hash)s)")
                              % ({'refname': refname, 'old_hash': old_hash})
                              ) # pragma: no cover
           new_hash = m.group(1)
-        f.write('{} {} {}\n'.format(old_hash, new_hash, refname))
+        f.write(b'%s %s %s\n' % (old_hash, new_hash, refname))
     if batch_check_process:
       batch_check_process.stdin.close()
       batch_check_process.wait()
 
-    with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'bw') as f:
+    with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f:
       issues_found = False
       if self._commits_no_longer_merges:
         issues_found = True
@@ -1680,10 +1688,10 @@ class FastExportFilter(object):
           are now regular commits; they likely have suboptimal commit messages
           (e.g. "Merge branch next into master").  Original commit hash on the
           left, commit hash after filtering/rewriting on the right:
-          ''')[1:]))
+          ''')[1:]).encode())
         for oldhash, newhash in self._commits_no_longer_merges:
-          f.write('  {} {}\n'.format(oldhash, newhash))
-        f.write('\n')
+          f.write('  {} {}\n'.format(oldhash, newhash).encode())
+        f.write(b'\n')
 
       if self._commits_referenced_but_removed:
         issues_found = True
@@ -1691,16 +1699,16 @@ class FastExportFilter(object):
           The following commits were filtered out, but referenced in another
           commit message.  The reference to the now-nonexistent commit hash
           (or a substring thereof) was left as-is in any commit messages:
-          ''')[1:]))
+          ''')[1:]).encode())
         for bad_commit_reference in self._commits_referenced_but_removed:
-          f.write('  {}\n'.format(bad_commit_reference))
-        f.write('\n')
+          f.write('  {}\n'.format(bad_commit_reference).encode())
+        f.write(b'\n')
 
       if not issues_found:
-        f.write(_("No filtering problems encountered."))
+        f.write(_("No filtering problems encountered.\n").encode())
 
-    with open(os.path.join(metadata_dir, 'already_ran'), 'bw') as f:
-       f.write(_("This file exists to allow you to filter again without --force."))
+    with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f:
+       f.write(_("This file exists to allow you to filter again without --force.\n").encode())
 
   def get_seen_refs(self):
     return self._seen_refs.keys()
@@ -1718,30 +1726,30 @@ class FastExportFilter(object):
     # Run over the input and do the filtering
     self._advance_currentline()
     while self._currentline:
-      if   self._currentline.startswith('blob'):
+      if   self._currentline.startswith(b'blob'):
         self._parse_blob()
-      elif self._currentline.startswith('reset'):
+      elif self._currentline.startswith(b'reset'):
         self._parse_reset()
-      elif self._currentline.startswith('commit'):
+      elif self._currentline.startswith(b'commit'):
         self._parse_commit()
-      elif self._currentline.startswith('tag'):
+      elif self._currentline.startswith(b'tag'):
         self._parse_tag()
-      elif self._currentline.startswith('progress'):
+      elif self._currentline.startswith(b'progress'):
         self._parse_progress()
-      elif self._currentline.startswith('checkpoint'):
+      elif self._currentline.startswith(b'checkpoint'):
         self._parse_checkpoint()
-      elif self._currentline.startswith('feature'):
+      elif self._currentline.startswith(b'feature'):
         self._parse_literal_command()
-      elif self._currentline.startswith('option'):
+      elif self._currentline.startswith(b'option'):
         self._parse_literal_command()
-      elif self._currentline.startswith('done'):
+      elif self._currentline.startswith(b'done'):
         self._handle_final_commands()
         self._parse_literal_command()
-      elif self._currentline.startswith('#'):
+      elif self._currentline.startswith(b'#'):
         self._parse_literal_command()
-      elif self._currentline.startswith('get-mark') or \
-           self._currentline.startswith('cat-blob') or \
-           self._currentline.startswith('ls'):
+      elif self._currentline.startswith(b'get-mark') or \
+           self._currentline.startswith(b'cat-blob') or \
+           self._currentline.startswith(b'ls'):
         raise SystemExit(_("Unsupported command: '%s'") % self._currentline)
       else:
         raise SystemExit(_("Could not parse line: '%s'") % self._currentline)
@@ -1798,13 +1806,13 @@ class GitUtils(object):
   def is_repository_bare(repo_working_dir):
     out = subprocess.check_output('git rev-parse --is-bare-repository'.split(),
                                   cwd=repo_working_dir)
-    return (out.strip() == 'true')
+    return (out.strip() == b'true')
 
   @staticmethod
   def determine_git_dir(repo_working_dir):
     d = subprocess.check_output('git rev-parse --git-dir'.split(),
                                 cwd=repo_working_dir).strip()
-    if repo_working_dir=='.' or d.startswith('/'):
+    if repo_working_dir==b'.' or d.startswith(b'/'):
       return d
     return os.path.join(repo_working_dir, d)
 
@@ -1841,12 +1849,12 @@ class FilteringOptions(object):
     def __call__(self, parser, namespace, values, option_string=None):
       af = FilteringOptions.AppendFilter(dest='path_changes',
                                          option_strings=None)
-      dirname = values if values[-1] == '/' else values+'/'
+      dirname = values if values[-1] == b'/' else values+b'/'
       if option_string == '--subdirectory-filter':
         af(parser, namespace, dirname,     '--path-match')
-        af(parser, namespace, dirname+':', '--path-rename')
+        af(parser, namespace, dirname+b':', '--path-rename')
       elif option_string == '--to-subdirectory-filter':
-        af(parser, namespace, ':'+dirname, '--path-rename')
+        af(parser, namespace, b':'+dirname, '--path-rename')
       else:
         raise SystemExit(_("Error: HelperFilter given invalid option_string: %s")
                          % option_string) # pragma: no cover
@@ -2047,7 +2055,7 @@ class FilteringOptions(object):
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     p.wait()
     output = p.stdout.read()
-    if '--combined-all-paths' not in output:
+    if b'--combined-all-paths' not in output:
       raise SystemExit(_("Error: need a version of git whose diff-tree command "
                          "has the --combined-all-paths option")) # pragma: no cover
 
@@ -2057,24 +2065,24 @@ class FilteringOptions(object):
     replace_regexes = []
     with open(filename, 'br') as f:
       for line in f:
-        line = line.rstrip('\r\n')
+        line = line.rstrip(b'\r\n')
 
         # Determine the replacement
-        replacement = '***REMOVED***'
-        if '==>' in line:
-          line, replacement = line.rsplit('==>', 1)
+        replacement = b'***REMOVED***'
+        if b'==>' in line:
+          line, replacement = line.rsplit(b'==>', 1)
 
         # See if we need to match via regex
         regex = None
-        if line.startswith('regex:'):
+        if line.startswith(b'regex:'):
           regex = line[6:]
-        elif line.startswith('glob:'):
+        elif line.startswith(b'glob:'):
           regex = glob_to_regex(line[5:])
         if regex:
           replace_regexes.append((re.compile(regex), replacement))
         else:
           # Otherwise, find the literal we need to replace
-          if line.startswith('literal:'):
+          if line.startswith(b'literal:'):
             line = line[8:]
           if not line:
             continue
@@ -2149,7 +2157,7 @@ class RepoAnalyze(object):
     # Figure out kind of deletions to undo for this file, and update lists
     # of all-names-by-sha and all-filenames
     delmode = 'tree_deletions'
-    if mode != '040000':
+    if mode != b'040000':
       delmode = 'file_deletions'
       stats['names'][sha].add(filename)
       stats['allnames'].add(filename)
@@ -2179,22 +2187,22 @@ class RepoAnalyze(object):
     graph.add_commit_and_parents(commit, parents)
     for change in file_changes:
       modes, shas, change_types, filenames = change
-      if len(parents) == 1 and change_types.startswith('R'):
-        change_types = 'R'  # remove the rename score; we don't care
-      if modes[-1] == '160000':
+      if len(parents) == 1 and change_types.startswith(b'R'):
+        change_types = b'R'  # remove the rename score; we don't care
+      if modes[-1] == b'160000':
         continue
-      elif modes[-1] == '000000':
+      elif modes[-1] == b'000000':
         # Track when files/directories are deleted
         for f in RepoAnalyze.equiv_class(stats, filenames[-1]):
-          if any(x == '040000' for x in modes[0:-1]):
+          if any(x == b'040000' for x in modes[0:-1]):
             stats['tree_deletions'][f] = date
           else:
             stats['file_deletions'][f] = date
-      elif change_types.strip('AMT') == '':
+      elif change_types.strip(b'AMT') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
-      elif modes[-1] == '040000' and change_types.strip('RAM') == '':
+      elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
-      elif change_types.strip('RAM') == '':
+      elif change_types.strip(b'RAM') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
         RepoAnalyze.handle_renames(stats, commit, change_types, filenames)
       else:
@@ -2219,7 +2227,7 @@ class RepoAnalyze(object):
     for line in cf.stdout:
       sha, objtype, objsize, objdisksize = line.split()
       objsize, objdisksize = int(objsize), int(objdisksize)
-      if objtype == 'blob':
+      if objtype == b'blob':
         unpacked_size[sha] = objsize
         packed_size[sha] = objdisksize
       num_blobs += 1
@@ -2269,17 +2277,17 @@ class RepoAnalyze(object):
       if cont and not line:
         cont = False
         for line in f:
-          if not line.startswith(':'):
+          if not line.startswith(b':'):
             cont = True
             break
           n = 1+max(1, len(parents))
-          assert line.startswith(':'*(n-1))
+          assert line.startswith(b':'*(n-1))
           relevant = line[n-1:-1]
           splits = relevant.split(None, n)
           modes = splits[0:n]
           splits = splits[n].split(None, n)
           shas = splits[0:n]
-          splits = splits[n].split('\t')
+          splits = splits[n].split(b'\t')
           change_types = splits[0]
           filenames = [PathQuoting.dequote(x) for x in splits[1:]]
           file_changes.append([modes, shas, change_types, filenames])
@@ -2304,13 +2312,13 @@ class RepoAnalyze(object):
   @staticmethod
   def write_report(reportdir, stats):
     def datestr(datetimestr):
-      return datetimestr if datetimestr else _('<present>')
+      return datetimestr if datetimestr else _('<present>').encode()
 
     def dirnames(path):
       while True:
         path = os.path.dirname(path)
         yield path
-        if path == '':
+        if path == b'':
           break
 
     # Compute aggregate size information for paths, extensions, and dirs
@@ -2352,27 +2360,27 @@ class RepoAnalyze(object):
     for name in dir_size['packed']:
       dir_deleted_data[name] = stats['tree_deletions'].get(name, None)
 
-    with open(os.path.join(reportdir, "README"), 'bw') as f:
+    with open(os.path.join(reportdir, b"README"), 'bw') as f:
       # Give a basic overview of this file
-      f.write("== %s ==\n" % _("Overall Statistics"))
-      f.write("  %s: %d\n" % (_("Number of commits"),
-                              stats['num_commits']))
-      f.write("  %s: %d\n" % (_("Number of filenames"),
-                              len(path_size['packed'])))
-      f.write("  %s: %d\n" % (_("Number of directories"),
-                              len(dir_size['packed'])))
-      f.write("  %s: %d\n" % (_("Number of file extensions"),
-                              len(ext_size['packed'])))
-      f.write("\n")
-      f.write("  %s: %d\n" % (_("Total unpacked size (bytes)"),
-                              total_size['unpacked']))
-      f.write("  %s: %d\n" % (_("Total packed size (bytes)"),
-                              total_size['packed']))
-      f.write("\n")
+      f.write(b"== %s ==\n" % _("Overall Statistics").encode())
+      f.write(("  %s: %d\n" % (_("Number of commits"),
+                               stats['num_commits'])).encode())
+      f.write(("  %s: %d\n" % (_("Number of filenames"),
+                               len(path_size['packed']))).encode())
+      f.write(("  %s: %d\n" % (_("Number of directories"),
+                               len(dir_size['packed']))).encode())
+      f.write(("  %s: %d\n" % (_("Number of file extensions"),
+                               len(ext_size['packed']))).encode())
+      f.write(b"\n")
+      f.write(("  %s: %d\n" % (_("Total unpacked size (bytes)"),
+                               total_size['unpacked'])).encode())
+      f.write(("  %s: %d\n" % (_("Total packed size (bytes)"),
+                               total_size['packed'])).encode())
+      f.write(b"\n")
 
       # Mention issues with the report
-      f.write("== %s ==\n" % _("Caveats"))
-      f.write("=== %s ===\n" % _("Sizes"))
+      f.write(("== %s ==\n" % _("Caveats")).encode())
+      f.write(("=== %s ===\n" % _("Sizes")).encode())
       f.write(textwrap.dedent(_("""
         Packed size represents what size your repository would be if no
         trees, commits, tags, or other metadata were included (though it may
@@ -2400,9 +2408,9 @@ class RepoAnalyze(object):
         ever reverted to a previous version's contents, the previous
         version's size will be counted multiple times in this analysis, even
         though git will only store it once.
-        """)[1:]))
-      f.write("\n")
-      f.write("=== %s ===\n" % _("Deletions"))
+        """)[1:]).encode())
+      f.write(b"\n")
+      f.write(("=== %s ===\n" % _("Deletions")).encode())
       f.write(textwrap.dedent(_("""
         Whether a file is deleted is not a binary quality, since it can be
         deleted on some branches but still exist in others.  Also, it might
@@ -2418,9 +2426,9 @@ class RepoAnalyze(object):
           stream that mentions the file lists it as deleted.
         This makes it dependent on topological ordering, but generally gives
         the "right" answer.
-        """)[1:]))
-      f.write("\n")
-      f.write("=== %s ===\n" % _("Renames"))
+        """)[1:]).encode())
+      f.write(b"\n")
+      f.write(("=== %s ===\n" % _("Renames")).encode())
       f.write(textwrap.dedent(_("""
         Renames share the same non-binary nature that deletions do, plus
         additional challenges:
@@ -2436,101 +2444,105 @@ class RepoAnalyze(object):
           * The ability for users to rename files differently in different
             branches means that our chains of renames will not necessarily be
             linear but may branch out.
-        """)[1:]))
-      f.write("\n")
+        """)[1:]).encode())
+      f.write(b"\n")
 
     # Equivalence classes for names, so if folks only want to keep a
     # certain set of paths, they know the old names they want to include
     # too.
-    with open(os.path.join(reportdir, "renames.txt"), 'bw') as f:
+    with open(os.path.join(reportdir, b"renames.txt"), 'bw') as f:
       seen = set()
       for pathname,equiv_group in sorted(stats['equivalence'].items(),
                                          key=lambda x:(x[1], x[0])):
         if equiv_group in seen:
           continue
         seen.add(equiv_group)
-        f.write("{} ->\n    ".format(decode(equiv_group[0])) +
+        f.write(("{} ->\n    ".format(decode(equiv_group[0])) +
                      "\n    ".join(decode(x) for x in equiv_group[1:]) +
-                "\n")
+                 "\n").encode())
 
     # List directories in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted directories by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
+    with open(os.path.join(reportdir, b"directories-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted directories by reverse size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
+      f.write(msg.encode())
       for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (dir_deleted_data[dirname]):
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(dir_size['unpacked'][dirname],
-                          size,
-                          datestr(dir_deleted_data[dirname]),
-                          dirname or _('<toplevel>')))
+          f.write(b"  %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
+                                              size,
+                                              datestr(dir_deleted_data[dirname]),
+                                              dirname or _('<toplevel>').encode()))
 
-    with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All directories by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
+    with open(os.path.join(reportdir, b"directories-all-sizes.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("All directories by reverse size")).encode())
+      msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
+      f.write(msg.encode())
       for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(dir_size['unpacked'][dirname],
-                        size,
-                        datestr(dir_deleted_data[dirname]),
-                        dirname or _("<toplevel>")))
+        f.write(b"  %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
+                                            size,
+                                            datestr(dir_deleted_data[dirname]),
+                                            dirname or _("<toplevel>").encode()))
 
     # List extensions in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted extensions by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
+    with open(os.path.join(reportdir, b"extensions-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted extensions by reverse size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
+      f.write(msg.encode())
       for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (ext_deleted_data[extname]):
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(ext_size['unpacked'][extname],
-                          size,
-                          datestr(ext_deleted_data[extname]),
-                          extname or _('<no extension>')))
+          f.write(b"  %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
+                                              size,
+                                              datestr(ext_deleted_data[extname]),
+                                              extname or _('<no extension>').encode()))
 
-    with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All extensions by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
+    with open(os.path.join(reportdir, b"extensions-all-sizes.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("All extensions by reverse size")).encode())
+      msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
+      f.write(msg.encode())
       for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(ext_size['unpacked'][extname],
-                        size,
-                        datestr(ext_deleted_data[extname]),
-                        extname or _('<no extension>')))
+        f.write(b"  %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
+                                            size,
+                                            datestr(ext_deleted_data[extname]),
+                                            extname or _('<no extension>').encode()))
 
     # List files in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n"))
+    with open(os.path.join(reportdir, b"path-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted paths by reverse accumulated size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, path name(s)\n")
+      f.write(msg.encode())
       for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
         if when:
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(path_size['unpacked'][pathname],
-                          size,
-                          datestr(when),
-                          pathname))
+          f.write(b"  %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
+                                              size,
+                                              datestr(when),
+                                              pathname))
 
-    with open(os.path.join(reportdir, "path-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All paths by reverse accumulated size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n"))
+    with open(os.path.join(reportdir, b"path-all-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("All paths by reverse accumulated size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, pathectory name\n")
+      f.write(msg.encode())
       for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(path_size['unpacked'][pathname],
-                        size,
-                        datestr(when),
-                        pathname))
+        f.write(b"  %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
+                                            size,
+                                            datestr(when),
+                                            pathname))
 
     # List of filenames and sizes in descending order
-    with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size"))
-      f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n"))
+    with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode())
+      f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode())
       for sha, size in sorted(stats['packed_size'].items(),
                               key=lambda x:(x[1],x[0]), reverse=True):
         if sha not in stats['names']:
@@ -2541,21 +2553,21 @@ class RepoAnalyze(object):
         if len(names_with_sha) == 1:
           names_with_sha = names_with_sha.pop()
         else:
-          names_with_sha = sorted(list(names_with_sha))
-        f.write("  {} {:10d} {:10d} {}\n".format(sha,
-                                                 stats['unpacked_size'][sha],
-                                                 size,
-                                                 names_with_sha))
+          names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']'
+        f.write(b"  %s %10d %10d %s\n" % (sha,
+                                          stats['unpacked_size'][sha],
+                                          size,
+                                          names_with_sha))
 
   @staticmethod
   def run(args):
-    git_dir = GitUtils.determine_git_dir('.')
+    git_dir = GitUtils.determine_git_dir(b'.')
 
     # Create the report directory as necessary
-    results_tmp_dir = os.path.join(git_dir, 'filter-repo')
+    results_tmp_dir = os.path.join(git_dir, b'filter-repo')
     if not os.path.isdir(results_tmp_dir):
       os.mkdir(results_tmp_dir)
-    reportdir = os.path.join(results_tmp_dir, "analysis")
+    reportdir = os.path.join(results_tmp_dir, b"analysis")
     if not args.force and os.path.isdir(reportdir):
       shutil.rmtree(reportdir)
     os.mkdir(reportdir)
@@ -2693,7 +2705,7 @@ class RepoFilter(object):
     # Do sanity checks from the correct directory
     tmp_dir = self.results_tmp_dir(create_if_missing=False)
     if not self._args.force and \
-       not os.path.isfile(os.path.join(tmp_dir, 'already_ran')):
+       not os.path.isfile(os.path.join(tmp_dir, b'already_ran')):
       cwd = os.getcwd()
       os.chdir(target_working_dir)
       RepoFilter.sanity_check(self._orig_refs, is_bare)
@@ -2710,27 +2722,27 @@ class RepoFilter(object):
 
     # Make sure repo is fully packed, just like a fresh clone would be
     output = subprocess.check_output('git count-objects -v'.split())
-    stats = dict(x.split(': ') for x in output.splitlines())
-    num_packs = int(stats['packs'])
-    if stats['count'] != '0' or num_packs > 1:
+    stats = dict(x.split(b': ') for x in output.splitlines())
+    num_packs = int(stats[b'packs'])
+    if stats[b'count'] != b'0' or num_packs > 1:
       abort(_("expected freshly packed repo"))
 
     # Make sure there is precisely one remote, named "origin"...or that this
     # is a new bare repo with no packs and no remotes
     output = subprocess.check_output('git remote'.split()).strip()
-    if not (output == "origin" or (num_packs == 0 and not output)):
+    if not (output == b"origin" or (num_packs == 0 and not output)):
       abort(_("expected one remote, origin"))
 
     # Avoid letting people running with weird setups and overwriting GIT_DIR
     # elsewhere
-    git_dir = GitUtils.determine_git_dir('.')
-    if is_bare and git_dir != '.':
+    git_dir = GitUtils.determine_git_dir(b'.')
+    if is_bare and git_dir != b'.':
       abort(_("GIT_DIR must be ."))
-    elif not is_bare and git_dir != '.git':
+    elif not is_bare and git_dir != b'.git':
       abort(_("GIT_DIR must be .git"))
 
     # Make sure that all reflogs have precisely one entry
-    reflog_dir=os.path.join(git_dir, 'logs')
+    reflog_dir=os.path.join(git_dir, b'logs')
     for root, dirs, files in os.walk(reflog_dir):
       for filename in files:
         pathname = os.path.join(root, filename)
@@ -2741,7 +2753,7 @@ class RepoFilter(object):
                   decode(shortpath))
 
     # Make sure there are no stashed changes
-    if 'refs/stash' in refs:
+    if b'refs/stash' in refs:
       abort(_("has stashed changes"))
 
     # Do extra checks in non-bare repos
@@ -2756,9 +2768,9 @@ class RepoFilter(object):
 
       # Avoid unpushed changes
       for refname, rev in refs.items():
-        if not refname.startswith('refs/heads/'):
+        if not refname.startswith(b'refs/heads/'):
           continue
-        origin_ref = refname.replace('refs/heads/', 'refs/remotes/origin/')
+        origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/')
         if origin_ref not in refs:
           abort(_('%s exists, but %s not found') % (decode(refname),
                                                     decode(origin_ref)))
@@ -2776,13 +2788,13 @@ class RepoFilter(object):
 
   def tweak_commit(self, commit):
     def filename_matches(path_expression, pathname):
-      if path_expression == '':
+      if path_expression == b'':
         return True
       n = len(path_expression)
       if (pathname.startswith(path_expression) and
-          (path_expression[n-1:n] == '/' or
+          (path_expression[n-1:n] == b'/' or
            len(pathname) == n or
-           pathname[n:n+1] == '/')):
+           pathname[n:n+1] == b'/')):
         return True
       return False
 
@@ -2798,7 +2810,7 @@ class RepoFilter(object):
           if match_type == 'regex' and path_exp.search(pathname):
             wanted = True
         elif mod_type == 'rename':
-          old_exp, new_exp = path_exp.split(':')
+          old_exp, new_exp = path_exp.split(b':')
           assert match_type in ('prefix',)
           if match_type == 'prefix' and pathname.startswith(old_exp):
             pathname = pathname.replace(old_exp, new_exp, 1)
@@ -2866,15 +2878,15 @@ class RepoFilter(object):
         #      in sync with the original with any changes, and then decides
         #      they want to rewrite history to only have one of the two files)
         colliding_change = new_file_changes[change.filename]
-        if change.type == 'D':
+        if change.type == b'D':
           # We can just throw this one away and keep the other
           continue
-        elif change.type == 'M' and (
+        elif change.type == b'M' and (
             change.mode == colliding_change.mode and
             change.blob_id == colliding_change.blob_id):
           # The two are identical, so we can throw this one away and keep other
           continue
-        elif new_file_changes[change.filename].type != 'D':
+        elif new_file_changes[change.filename].type != b'D':
           raise SystemExit(_("File renaming caused colliding pathnames!\n") +
                            _("  Commit: {}\n").format(commit.original_id) +
                            _("  Filename: {}").format(change.filename))
@@ -2883,8 +2895,8 @@ class RepoFilter(object):
 
   @staticmethod
   def do_tag_rename(rename_pair, tagname):
-    old, new = rename_pair.split(':', 1)
-    old, new = 'refs/tags/'+old, 'refs/tags/'+new
+    old, new = rename_pair.split(b':', 1)
+    old, new = b'refs/tags/'+old, b'refs/tags/'+new
     if tagname.startswith(old):
       return tagname.replace(old, new, 1)
     return tagname
@@ -2895,7 +2907,7 @@ class RepoFilter(object):
       tag.message = self._message_callback(tag.message)
 
     # Tweak the tag name according to callbacks
-    tag_prefix = 'refs/tags/'
+    tag_prefix = b'refs/tags/'
     fullref = tag_prefix+tag.ref
     if self._args.tag_rename:
       fullref = RepoFilter.do_tag_rename(self._args.tag_rename, fullref)
@@ -2923,9 +2935,9 @@ class RepoFilter(object):
       reset.ref = self._refname_callback(reset.ref)
 
   def results_tmp_dir(self, create_if_missing=True):
-    working_dir = self._args.target or self._args.source or '.'
+    working_dir = self._args.target or self._args.source or b'.'
     git_dir = GitUtils.determine_git_dir(working_dir)
-    d = os.path.join(git_dir, 'filter-repo')
+    d = os.path.join(git_dir, b'filter-repo')
     if create_if_missing and not os.path.isdir(d):
       os.mkdir(d)
     return d
@@ -2970,7 +2982,7 @@ class RepoFilter(object):
       self._input = self._fep.stdout
       if self._args.dry_run or self._args.debug:
         self._fe_orig = os.path.join(self.results_tmp_dir(),
-                                     'fast-export.original')
+                                     b'fast-export.original')
         output = open(self._fe_orig, 'bw')
         self._input = InputFileBackup(self._input, output)
         if self._args.debug:
@@ -2989,7 +3001,7 @@ class RepoFilter(object):
       self._import_pipes = (self._fip.stdin, self._fip.stdout)
     if self._args.dry_run or self._args.debug:
       self._fe_filt = os.path.join(self.results_tmp_dir(),
-                                   'fast-export.filtered')
+                                   b'fast-export.filtered')
       self._output = open(self._fe_filt, 'bw')
     else:
       self._output = self._fip.stdin
@@ -3003,7 +3015,7 @@ class RepoFilter(object):
     if self._args.dry_run:
       return
     refs_to_migrate = set(x for x in self._orig_refs
-                          if x.startswith('refs/remotes/origin/'))
+                          if x.startswith(b'refs/remotes/origin/'))
     if not refs_to_migrate:
       return
     if self._args.debug:
@@ -3013,14 +3025,14 @@ class RepoFilter(object):
                          stdin=subprocess.PIPE,
                          cwd=target_working_dir)
     for ref in refs_to_migrate:
-      if ref == 'refs/remotes/origin/HEAD':
-        p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref]))
+      if ref == b'refs/remotes/origin/HEAD':
+        p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
         del self._orig_refs[ref]
         continue
-      newref = ref.replace('refs/remotes/origin/', 'refs/heads/')
+      newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/')
       if newref not in self._orig_refs:
-        p.stdin.write('create {} {}\n'.format(newref, self._orig_refs[ref]))
-      p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref]))
+        p.stdin.write(b'create %s %s\n' % (newref, self._orig_refs[ref]))
+      p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
       self._orig_refs[newref] = self._orig_refs[ref]
       del self._orig_refs[ref]
     p.stdin.close()
@@ -3115,11 +3127,11 @@ class RepoFilter(object):
       if refs_to_nuke:
         if self._args.debug:
           print("[DEBUG] Deleting the following refs:\n  "+
-                decode("\n  ".join(refs_to_nuke)))
+                decode(b"\n  ".join(refs_to_nuke)))
         p = subprocess.Popen('git update-ref --stdin'.split(),
                              stdin=subprocess.PIPE,
                              cwd=target_working_dir)
-        p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
+        p.stdin.write(b''.join([b"option no-deref\ndelete %s\n" % x
                                for x in refs_to_nuke]))
         p.stdin.close()
         if p.wait():
diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh
index 52221d1..8a674ab 100755
--- a/t/t9390-filter-repo.sh
+++ b/t/t9390-filter-repo.sh
@@ -450,15 +450,15 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
 		head -n 9 README >actual &&
 		test_cmp expect actual &&
 
-		cat | tr Q "\047" >expect <<-\EOF &&
+		cat >expect <<-\EOF &&
 		=== Files by sha and associated pathnames in reverse size ===
 		Format: sha, unpacked size, packed size, filename(s) object stored as
 		  a89c82a2d4b713a125a4323d25adda062cc0013d         44         48 numbers/medium.num
 		  f00c965d8307308469e537302baa73048488f162         21         37 numbers/small.num
 		  2aa69a2a708eed00cb390e30f6bcc3eed773f390         20         36 whatever
-		  51b95456de9274c9a95f756742808dfd480b9b35         13         29 [QcapriciousQ, QfickleQ, QmercurialQ]
-		  732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0          5         20 [Qsequence/knowQ, Qwords/knowQ]
-		  34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373          5         20 [Qsequence/toQ, Qwords/toQ]
+		  51b95456de9274c9a95f756742808dfd480b9b35         13         29 [capricious, fickle, mercurial]
+		  732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0          5         20 [sequence/know, words/know]
+		  34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373          5         20 [sequence/to, words/to]
 		  7ecb56eb3fa3fa6f19dd48bca9f971950b119ede          3         18 words/know
 		EOF
 		test_cmp expect blob-shas-and-paths.txt &&
@@ -795,7 +795,7 @@ test_expect_success 'incremental import' '
 
 		original=$(git rev-parse master) &&
 		git fast-export --reference-excluded-parents master~2..master \
-			| git filter-repo --stdin --refname-callback "return \"develop\"" &&
+			| git filter-repo --stdin --refname-callback "return b\"develop\"" &&
 		test "$(git rev-parse develop)" = "$original"
 	)
 '
diff --git a/t/t9391/commit_info.py b/t/t9391/commit_info.py
index a0d34f3..01fd725 100755
--- a/t/t9391/commit_info.py
+++ b/t/t9391/commit_info.py
@@ -13,12 +13,12 @@ import git_filter_repo as fr
 
 def change_up_them_commits(commit):
   # Change the commit author
-  if commit.author_name == "Copy N. Paste":
-    commit.author_name = "Ima L. Oser"
-    commit.author_email = "aloser@my.corp"
+  if commit.author_name == b"Copy N. Paste":
+    commit.author_name = b"Ima L. Oser"
+    commit.author_email = b"aloser@my.corp"
 
   # Fix the author email
-  commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email)
+  commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email)
 
   # Fix the committer date (bad timezone conversion in initial import)
   oldtime = fr.string_to_date(commit.committer_date)
@@ -26,7 +26,7 @@ def change_up_them_commits(commit):
   commit.committer_date = fr.date_to_string(newtime)
 
   # Fix the commit message
-  commit.message = re.sub("Marketing is staffed with pansies", "",
+  commit.message = re.sub(b"Marketing is staffed with pansies", b"",
                           commit.message)
 
 args = fr.FilteringOptions.parse_args(['--force'])
diff --git a/t/t9391/create_fast_export_output.py b/t/t9391/create_fast_export_output.py
index e2ef13c..1eb0a3d 100755
--- a/t/t9391/create_fast_export_output.py
+++ b/t/t9391/create_fast_export_output.py
@@ -23,82 +23,82 @@ out.importer_only()
 
 output = out._output
 
-world = Blob("Hello")
+world = Blob(b"Hello")
 world.dump(output)
 
-bar = Blob("foo\n")
+bar = Blob(b"foo\n")
 bar.dump(output)
 
-master = Reset("refs/heads/master")
+master = Reset(b"refs/heads/master")
 master.dump(output)
 
-changes = [FileChanges('M', 'world', world.id, mode="100644"),
-           FileChanges('M', 'bar',   bar.id,   mode="100644")]
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
+           FileChanges(b'M', b'bar',   bar.id,   mode=b"100644")]
 when = datetime(year=2005, month=4, day=7,
                 hour=15, minute=16, second=10,
-                tzinfo=FixedTimeZone("-0700"))
+                tzinfo=FixedTimeZone(b"-0700"))
 when_string = fr.date_to_string(when)
-commit1 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "My first commit!  Wooot!\n\nLonger description",
+commit1 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"My first commit!  Wooot!\n\nLonger description",
                  changes,
                  parents = [])
 commit1.dump(output)
 
-world = Blob("Hello\nHi")
+world = Blob(b"Hello\nHi")
 world.dump(output)
-world_link = Blob("world")
+world_link = Blob(b"world")
 world_link.dump(output)
 
-changes = [FileChanges('M', 'world',  world.id,      mode="100644"),
-           FileChanges('M', 'planet', world_link.id, mode="120000")]
+changes = [FileChanges(b'M', b'world',  world.id,      mode=b"100644"),
+           FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
 when += timedelta(days=3, hours=4, minutes=6)
 when_string = fr.date_to_string(when)
-commit2 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Make a symlink to world called planet, modify world",
+commit2 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Make a symlink to world called planet, modify world",
                  changes,
                  parents = [commit1.id])
 commit2.dump(output)
 
-script = Blob("#!/bin/sh\n\necho Hello")
+script = Blob(b"#!/bin/sh\n\necho Hello")
 script.dump(output)
-changes = [FileChanges('M', 'runme', script.id, mode="100755"),
-           FileChanges('D', 'bar')]
-when_string = "1234567890 -0700"
-commit3 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Add runme script, remove bar",
+changes = [FileChanges(b'M', b'runme', script.id, mode=b"100755"),
+           FileChanges(b'D', b'bar')]
+when_string = b"1234567890 -0700"
+commit3 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Add runme script, remove bar",
                  changes,
                  parents = [commit2.id])
 commit3.dump(output)
 
-progress = Progress("Done with the master branch now...")
+progress = Progress(b"Done with the master branch now...")
 progress.dump(output)
 checkpoint = Checkpoint()
 checkpoint.dump(output)
 
-devel = Reset("refs/heads/devel", commit1.id)
+devel = Reset(b"refs/heads/devel", commit1.id)
 devel.dump(output)
 
-world = Blob("Hello\nGoodbye")
+world = Blob(b"Hello\nGoodbye")
 world.dump(output)
 
-changes = [FileChanges('M', 'world', world.id, mode="100644")]
-when = datetime(2006, 8, 17, tzinfo=FixedTimeZone("+0200"))
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644")]
+when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200"))
 when_string = fr.date_to_string(when)
-commit4 = Commit("refs/heads/devel",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Modify world",
+commit4 = Commit(b"refs/heads/devel",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Modify world",
                  changes,
                  parents = [commit1.id])
 commit4.dump(output)
 
-world = Blob("Hello\nHi\nGoodbye")
+world = Blob(b"Hello\nHi\nGoodbye")
 world.dump(output)
 when = fr.string_to_date(commit3.author_date) + timedelta(days=47)
 when_string = fr.date_to_string(when)
@@ -106,22 +106,22 @@ when_string = fr.date_to_string(when)
 # to the first parent.  Thus, despite the fact that runme and planet have
 # not changed and bar was not modified in the devel side, we have to list them
 # all anyway.
-changes = [FileChanges('M', 'world', world.id, mode="100644"),
-           FileChanges('D', 'bar'),
-           FileChanges('M', 'runme', script.id, mode="100755"),
-           FileChanges('M', 'planet', world_link.id, mode="120000")]
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
+           FileChanges(b'D', b'bar'),
+           FileChanges(b'M', b'runme', script.id, mode=b"100755"),
+           FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
 
-commit5 = Commit("refs/heads/devel",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Merge branch 'master'\n",
+commit5 = Commit(b"refs/heads/devel",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Merge branch 'master'\n",
                  changes,
                  parents = [commit4.id, commit3.id])
 commit5.dump(output)
 
 
-mytag = Tag("refs/tags/v1.0", commit5.id,
-            "His R. Highness", "royalty@my.kingdom", when_string,
-            "I bequeath to my peons this royal software")
+mytag = Tag(b"refs/tags/v1.0", commit5.id,
+            b"His R. Highness", b"royalty@my.kingdom", when_string,
+            b"I bequeath to my peons this royal software")
 mytag.dump(output)
 out.finish()
diff --git a/t/t9391/file_filter.py b/t/t9391/file_filter.py
index 8540b7d..c3683fc 100755
--- a/t/t9391/file_filter.py
+++ b/t/t9391/file_filter.py
@@ -15,14 +15,14 @@ import sys
 import git_filter_repo as fr
 
 def drop_file_by_contents(blob):
-  bad_file_contents = 'The launch code is 1-2-3-4.'
+  bad_file_contents = b'The launch code is 1-2-3-4.'
   if blob.data == bad_file_contents:
     blob.skip()
 
 def drop_files_by_name(commit):
   new_file_changes = []
   for change in commit.file_changes:
-    if not change.filename.endswith('.doc'):
+    if not change.filename.endswith(b'.doc'):
       new_file_changes.append(change)
   commit.file_changes = new_file_changes
 
diff --git a/t/t9391/rename-master-to-develop.py b/t/t9391/rename-master-to-develop.py
index 7a922d0..1acfef8 100755
--- a/t/t9391/rename-master-to-develop.py
+++ b/t/t9391/rename-master-to-develop.py
@@ -14,8 +14,8 @@ not try to handle any such special cases.
 import git_filter_repo as fr
 
 def my_commit_callback(commit):
-  if commit.branch == "refs/heads/master":
-    commit.branch = "refs/heads/develop"
+  if commit.branch == b"refs/heads/master":
+    commit.branch = b"refs/heads/develop"
 
 args = fr.FilteringOptions.default_options()
 args.force = True
diff --git a/t/t9391/splice_repos.py b/t/t9391/splice_repos.py
index 133044e..5993436 100755
--- a/t/t9391/splice_repos.py
+++ b/t/t9391/splice_repos.py
@@ -29,11 +29,11 @@ class InterleaveRepositories:
 
   def hold_commit(self, commit):
     commit.skip(new_id = commit.id)
-    letter = re.match('Commit (.)', commit.message).group(1)
+    letter = re.match(b'Commit (.)', commit.message).group(1)
     self.commit_map[letter] = commit
 
   def weave_commit(self, commit):
-    letter = re.match('Commit (.)', commit.message).group(1)
+    letter = re.match(b'Commit (.)', commit.message).group(1)
     prev_letter = bytes([ord(letter)-1])
 
     # Splice in any extra commits needed
@@ -53,10 +53,10 @@ class InterleaveRepositories:
       fr.record_id_rename(new_commit.id, commit.id)
 
   def run(self):
-    blob = fr.Blob('public gpg key contents')
-    tag = fr.Tag('gpg-pubkey', blob.id,
-                 'Ima Tagger', 'ima@tagg.er', '1136199845 +0300',
-                 'Very important explanation and stuff')
+    blob = fr.Blob(b'public gpg key contents')
+    tag = fr.Tag(b'gpg-pubkey', blob.id,
+                 b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',
+                 b'Very important explanation and stuff')
 
     args = fr.FilteringOptions.parse_args(['--target', self.output_dir])
     out = fr.RepoFilter(args)
diff --git a/t/t9391/strip-cvs-keywords.py b/t/t9391/strip-cvs-keywords.py
index ccd3c8d..ae7cda0 100755
--- a/t/t9391/strip-cvs-keywords.py
+++ b/t/t9391/strip-cvs-keywords.py
@@ -18,8 +18,8 @@ def strip_cvs_keywords(blob):
   # FIXME: Should first check if blob is a text file to avoid ruining
   # binaries.  Could use python.magic here, or just output blob.data to
   # the unix 'file' command
-  pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
-  replacement = r'$\1$'
+  pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
+  replacement = br'$\1$'
   blob.data = re.sub(pattern, replacement, blob.data)
 
 args = fr.FilteringOptions.parse_args(['--force'])
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index da0cf89..190f82b 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -21,7 +21,7 @@ import textwrap
 import git_filter_repo as fr
 
 def handle_progress(progress):
-  print("Decipher this: "+bytes(reversed(progress.message)))
+  print(b"Decipher this: "+bytes(reversed(progress.message)))
 
 def handle_checkpoint(checkpoint_object):
   # Flip a coin; see if we want to pass the checkpoint through.
@@ -44,8 +44,8 @@ def track_everything(obj):
     # projects, I'm just verifying an invariant of the current code.
     assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]
 
-mystr = 'This is the contents of the blob'
-compare = "Blob:\n  blob\n  mark :1\n  data {}\n  {}".format(len(mystr), mystr)
+mystr = b'This is the contents of the blob'
+compare = b"Blob:\n  blob\n  mark :1\n  data %d\n  %s" % (len(mystr), mystr)
 # Next line's only purpose is testing code coverage of something that helps
 # debugging git-filter-repo; it is NOT something external folks should depend
 # upon.
@@ -102,14 +102,14 @@ stream = io.BytesIO(textwrap.dedent('''
   from :3
   M 100644 :1 salutation
 
-  '''[1:]))
+  '''[1:]).encode())
 
 counts = collections.Counter()
 def look_for_reset(obj):
   print("Processing {}".format(obj))
   counts[type(obj)] += 1
   if type(obj) == fr.Reset:
-    assert obj.ref == 'refs/heads/B'
+    assert obj.ref == b'refs/heads/B'
 
 # Use all kinds of internals that external scripts should NOT use and which
 # are likely to break in the future, just to verify a few invariants...
diff --git a/t/t9392-python-callback.sh b/t/t9392-python-callback.sh
index 983879e..27c338c 100755
--- a/t/t9392-python-callback.sh
+++ b/t/t9392-python-callback.sh
@@ -51,7 +51,7 @@ test_expect_success '--filename-callback' '
 	setup filename-callback &&
 	(
 		cd filename-callback &&
-		git filter-repo --filename-callback "return None if filename.endswith(\".doc\") else \"src/\"+filename" &&
+		git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" &&
 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
 		! grep file.doc f &&
 		COMPARE=$(wc -l <f) &&
@@ -64,7 +64,7 @@ test_expect_success '--message-callback' '
 	setup message-callback &&
 	(
 		cd message-callback &&
-		git filter-repo --message-callback "return \"TLDR: \"+message[0:5]" &&
+		git filter-repo --message-callback "return b\"TLDR: \"+message[0:5]" &&
 		git log --format=%s >log-messages &&
 		grep TLDR:...... log-messages >modified-messages &&
 		test_line_count = 6 modified-messages
@@ -75,7 +75,7 @@ test_expect_success '--name-callback' '
 	setup name-callback &&
 	(
 		cd name-callback &&
-		git filter-repo --name-callback "return name.replace(\"N.\", \"And\")" &&
+		git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" &&
 		git log --format=%an >log-person-names &&
 		grep Copy.And.Paste log-person-names
 	)
@@ -85,7 +85,7 @@ test_expect_success '--email-callback' '
 	setup email-callback &&
 	(
 		cd email-callback &&
-		git filter-repo --email-callback "return email.replace(\".com\", \".org\")" &&
+		git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" &&
 		git log --format=%ae%n%ce >log-emails &&
 		! grep .com log-emails &&
 		grep .org log-emails
@@ -98,7 +98,7 @@ test_expect_success '--refname-callback' '
 		cd refname-callback &&
 		git filter-repo --refname-callback "
                     dir,path = os.path.split(refname)
-                    return dir+\"/prefix-\"+path" &&
+                    return dir+b\"/prefix-\"+path" &&
 		git show-ref | grep refs/heads/prefix-master &&
 		git show-ref | grep refs/tags/prefix-v1.0 &&
 		git show-ref | grep refs/tags/prefix-v2.0
@@ -110,7 +110,7 @@ test_expect_success '--refname-callback sanity check' '
 	(
 		cd refname-sanity-check &&
 
-		test_must_fail git filter-repo --refname-callback "return re.sub(\"tags\", \"other-tags\", refname)" 2>../err &&
+		test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err &&
 		test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err &&
 		rm ../err
 	)
@@ -138,7 +138,7 @@ test_expect_success '--commit-callback' '
                     commit.committer_email = commit.author_email
                     commit.committer_date  = commit.author_date
                     for change in commit.file_changes:
-                      change.mode = \"100755\"
+                      change.mode = b\"100755\"
                     " &&
 		git log --format=%ae%n%ce >log-emails &&
 		! grep committer@example.com log-emails &&
@@ -153,8 +153,8 @@ test_expect_success '--tag-callback' '
 	(
 		cd tag-callback &&
 		git filter-repo --tag-callback "
-                    tag.tagger_name = \"Dr. \"+tag.tagger_name
-                    tag.message = \"Awesome sauce \"+tag.message
+                    tag.tagger_name = b\"Dr. \"+tag.tagger_name
+                    tag.message = b\"Awesome sauce \"+tag.message
                     " &&
 		git cat-file -p v2.0 | grep ^tagger.Dr\\. &&
 		git cat-file -p v2.0 | grep ^Awesome.sauce.Super
@@ -175,7 +175,7 @@ test_expect_success 'callback has return statement sanity check' '
 	(
 		cd callback_return_sanity &&
 
-		test_must_fail git filter-repo --filename-callback "filename + \".txt\"" 2>../err&&
+		test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&&
 		test_i18ngrep "Error: --filename-callback should have a return statement" ../err &&
 		rm ../err
 	)