From e5955f397f53f5f12ec0881096287eea27ee8573 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Mon, 1 Apr 2019 14:35:49 -0700
Subject: [PATCH 01/17] filter-repo (python3): shebang and imports

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo                      | 8 +++-----
 t/t9391-filter-repo-lib-usage.sh     | 2 +-
 t/t9391/commit_info.py               | 2 +-
 t/t9391/create_fast_export_output.py | 2 +-
 t/t9391/erroneous.py                 | 2 +-
 t/t9391/file_filter.py               | 2 +-
 t/t9391/print_progress.py            | 6 +++---
 t/t9391/rename-master-to-develop.py  | 2 +-
 t/t9391/splice_repos.py              | 2 +-
 t/t9391/strip-cvs-keywords.py        | 2 +-
 t/t9391/unusual.py                   | 6 +++---
 11 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 07c769c..6fb102a 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 git-filter-repo filters git repositories, similar to git filter-branch, BFG
@@ -30,8 +30,6 @@ operations; however:
   ***** END API BACKWARD COMPATIBILITY CAVEAT *****
 """
 
-from __future__ import print_function
-
 import argparse
 import collections
 import fnmatch
@@ -39,7 +37,7 @@ import gettext
 import os
 import re
 import shutil
-import StringIO
+import io
 import subprocess
 import sys
 import time
@@ -408,7 +406,7 @@ class _GitElement(object):
     Convert GitElement to string; used for debugging
     """
     old_dumped = self.dumped
-    writeme = StringIO.StringIO()
+    writeme = io.StringIO()
     self.dump(writeme)
     output_lines = writeme.getvalue().splitlines()
     writeme.close()
diff --git a/t/t9391-filter-repo-lib-usage.sh b/t/t9391-filter-repo-lib-usage.sh
index e923d29..a967f31 100755
--- a/t/t9391-filter-repo-lib-usage.sh
+++ b/t/t9391-filter-repo-lib-usage.sh
@@ -158,7 +158,7 @@ test_expect_success 'other error cases' '
 		mkdir other &&
 		cd other &&
 
-		! python -c "import git_filter_repo as fr; fr.GitUtils.get_commit_count(\".\", [\"HEAD\"])" 2>err &&
+		! python3 -c "import git_filter_repo as fr; fr.GitUtils.get_commit_count(\".\", [\"HEAD\"])" 2>err &&
 		test_i18ngrep ". does not appear to be a valid git repository" err
 	)
 '
diff --git a/t/t9391/commit_info.py b/t/t9391/commit_info.py
index e697bd8..a0d34f3 100755
--- a/t/t9391/commit_info.py
+++ b/t/t9391/commit_info.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/create_fast_export_output.py b/t/t9391/create_fast_export_output.py
index a1b21e0..e2ef13c 100755
--- a/t/t9391/create_fast_export_output.py
+++ b/t/t9391/create_fast_export_output.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/erroneous.py b/t/t9391/erroneous.py
index a5c05d2..db6051b 100755
--- a/t/t9391/erroneous.py
+++ b/t/t9391/erroneous.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/file_filter.py b/t/t9391/file_filter.py
index f6a1ae9..8540b7d 100755
--- a/t/t9391/file_filter.py
+++ b/t/t9391/file_filter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/print_progress.py b/t/t9391/print_progress.py
index 5256b74..bbca538 100755
--- a/t/t9391/print_progress.py
+++ b/t/t9391/print_progress.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
@@ -21,8 +21,8 @@ commit_count = 0
 
 def print_progress():
   global object_count, commit_count, total_objects, total_commits
-  print "\rRewriting commits... %d/%d  (%d objects)" \
-        % (commit_count, total_commits, object_count),
+  print("\rRewriting commits... %d/%d  (%d objects)"
+        % (commit_count, total_commits, object_count), end='')
 
 def my_blob_callback(blob):
   global object_count
diff --git a/t/t9391/rename-master-to-develop.py b/t/t9391/rename-master-to-develop.py
index f92517a..7a922d0 100755
--- a/t/t9391/rename-master-to-develop.py
+++ b/t/t9391/rename-master-to-develop.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/splice_repos.py b/t/t9391/splice_repos.py
index 00d0058..c7834c7 100755
--- a/t/t9391/splice_repos.py
+++ b/t/t9391/splice_repos.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/strip-cvs-keywords.py b/t/t9391/strip-cvs-keywords.py
index 1067d55..ccd3c8d 100755
--- a/t/t9391/strip-cvs-keywords.py
+++ b/t/t9391/strip-cvs-keywords.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 """
 Please see the
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index 3167c0f..6a61dbe 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Please: DO NOT USE THIS AS AN EXAMPLE.
 #
@@ -14,7 +14,7 @@
 import collections
 import os
 import random
-import StringIO
+import io
 import sys
 import textwrap
 
@@ -71,7 +71,7 @@ print("Found {} blobs/commits and {} other objects"
       .format(total_objects['common'], total_objects['uncommon']))
 
 
-stream = StringIO.StringIO(textwrap.dedent('''
+stream = io.StringIO(textwrap.dedent('''
   blob
   mark :1
   data 5

From 511a8f52f879846ddb7cbe09b7030ed83da3479d Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Mon, 1 Apr 2019 14:36:57 -0700
Subject: [PATCH 02/17] filter-repo (python3): iteritems() -> items()

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 6fb102a..09758a1 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -268,7 +268,7 @@ class MailmapInfo(object):
     ''' Given a name and email, return the expected new name and email from the
         mailmap if there is a translation rule for it, otherwise just return
         the given name and email.'''
-    for old, new in self.changes.iteritems():
+    for old, new in self.changes.items():
       old_name, old_email = old
       new_name, new_email = new
       if (email == old_email or not old_email) and (
@@ -1585,7 +1585,7 @@ class FastExportFilter(object):
 
   def _handle_final_commands(self):
     self._finalize_handled = True
-    for ref, value in self._seen_refs.iteritems():
+    for ref, value in self._seen_refs.items():
       if value is not None:
         # Create a reset
         reset = Reset(ref, value)
@@ -1604,13 +1604,13 @@ class FastExportFilter(object):
     self._flush_renames()
     with open(os.path.join(metadata_dir, 'commit-map'), 'w') as f:
       f.write("%-40s %s\n" % (_("old"), _("new")))
-      for (old,new) in self._commit_renames.iteritems():
+      for (old,new) in self._commit_renames.items():
         f.write('{} {}\n'.format(old, new if new != None else deleted_hash))
 
     batch_check_process = None
     batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
     with open(os.path.join(metadata_dir, 'ref-map'), 'w') as f:
-      for refname, old_hash in orig_refs.iteritems():
+      for refname, old_hash in orig_refs.items():
         if refname in refs_nuked:
           new_hash = deleted_hash
         elif old_hash in self._commit_renames:
@@ -2412,7 +2412,7 @@ class RepoAnalyze(object):
     # too.
     with open(os.path.join(reportdir, "renames.txt"), 'w') as f:
       seen = set()
-      for pathname,equiv_group in sorted(stats['equivalence'].iteritems(),
+      for pathname,equiv_group in sorted(stats['equivalence'].items(),
                                          key=lambda x:(x[1], x[0])):
         if equiv_group in seen:
           continue
@@ -2425,7 +2425,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("Deleted directories by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
-      for dirname, size in sorted(dir_size['packed'].iteritems(),
+      for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (dir_deleted_data[dirname]):
           f.write("  {:10d} {:10d} {:10s} {}\n"
@@ -2437,7 +2437,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("All directories by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
-      for dirname, size in sorted(dir_size['packed'].iteritems(),
+      for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         f.write("  {:10d} {:10d} {:10s} {}\n"
                 .format(dir_size['unpacked'][dirname],
@@ -2449,7 +2449,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("Deleted extensions by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
-      for extname, size in sorted(ext_size['packed'].iteritems(),
+      for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (ext_deleted_data[extname]):
           f.write("  {:10d} {:10d} {:10s} {}\n"
@@ -2461,7 +2461,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("All extensions by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
-      for extname, size in sorted(ext_size['packed'].iteritems(),
+      for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         f.write("  {:10d} {:10d} {:10s} {}\n"
                 .format(ext_size['unpacked'][extname],
@@ -2473,7 +2473,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size"))
       f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n"))
-      for pathname, size in sorted(path_size['packed'].iteritems(),
+      for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
         if when:
@@ -2486,7 +2486,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "path-all-sizes.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("All paths by reverse accumulated size"))
       f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n"))
-      for pathname, size in sorted(path_size['packed'].iteritems(),
+      for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
         f.write("  {:10d} {:10d} {:10s} {}\n"
@@ -2499,7 +2499,7 @@ class RepoAnalyze(object):
     with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'w') as f:
       f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size"))
       f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n"))
-      for sha, size in sorted(stats['packed_size'].iteritems(),
+      for sha, size in sorted(stats['packed_size'].items(),
                               key=lambda x:(x[1],x[0]), reverse=True):
         if sha not in stats['names']:
           # Some objects in the repository might not be referenced, or not
@@ -2723,7 +2723,7 @@ class RepoFilter(object):
         abort(_("you have untracked changes"))
 
       # Avoid unpushed changes
-      for refname, rev in refs.iteritems():
+      for refname, rev in refs.items():
         if not refname.startswith('refs/heads/'):
           continue
         origin_ref = refname.replace('refs/heads/', 'refs/remotes/origin/')

From 468ef568cf863e03bd9c1b28267eb96d2378d926 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Mon, 1 Apr 2019 14:38:02 -0700
Subject: [PATCH 03/17] filter-repo (python3): xrange() -> range()

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 09758a1..197664f 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -126,7 +126,7 @@ class PathQuoting:
                '"': '"',
                '\\':'\\'}
   _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})')
-  _escape = [chr(x) for x in xrange(127)]+['\\'+oct(x)[1:] for x in xrange(127,256)]
+  _escape = [chr(x) for x in range(127)]+['\\'+oct(x)[1:] for x in range(127,256)]
   _reverse = dict(map(reversed, _unescape.items()))
   for x in _reverse:
     _escape[ord(x)] = '\\'+_reverse[x]
@@ -1217,10 +1217,10 @@ class FastExportFilter(object):
     # ancestor of another parent.)
     num_parents = len(parents)
     to_remove = []
-    for cur in xrange(num_parents):
+    for cur in range(num_parents):
       if not is_rewritten[cur]:
         continue
-      for other in xrange(num_parents):
+      for other in range(num_parents):
         if cur == other:
           continue
         if not self._graph.is_ancestor(parents[cur], parents[other]):

From 2562f0270c41599eeed3a1fe26f03107596b72af Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Tue, 30 Apr 2019 23:46:05 -0700
Subject: [PATCH 04/17] filter-repo (python3): revert "workaround python<2.7.9
 exec bug"

Commit ca32c5d9afe2 ("filter-repo: workaround python<2.7.9 exec bug",
2019-04-30) put in a workaround for python versions prior to 2.7.9, but
which was incompatible with python3.  Revert it as one step towards
migrating to python3.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 197664f..8039fdd 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -2619,8 +2619,8 @@ class RepoFilter(object):
 
   def _handle_arg_callbacks(self):
     def make_callback(argname, str):
-      exec 'def callback({}):\n'.format(argname)+\
-           '  '+'\n  '.join(str.splitlines()) in globals()
+      exec('def callback({}):\n'.format(argname)+
+           '  '+'\n  '.join(str.splitlines()), globals())
       return callback #namespace['callback']
     def handle(type):
       callback_field = '_{}_callback'.format(type)

From 1a8e247ba72afdf79ff28bbf94693ed379cca2e5 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Fri, 26 Apr 2019 17:53:00 -0700
Subject: [PATCH 05/17] filter-repo (python3): add a decode() function

We need a function to transform byte strings into unicode strings for
printing error messages and occasional other uses.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/git-filter-repo b/git-filter-repo
index 8039fdd..5420acd 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -115,6 +115,10 @@ def date_to_string(dateobj):
   return('{} {}'.format(int(_timedelta_to_seconds(dateobj - epoch)),
                         dateobj.tzinfo.tzname(0)))
 
+def decode(bytestr):
+  'Try to convert bytestr to utf-8 for outputting as an error message.'
+  return bytestr.decode('utf-8', 'backslashreplace')
+
 class PathQuoting:
   _unescape = {'a': '\a',
                'b': '\b',

From ad3c839263be4ff7798ed8ae7cf5cf7f89806138 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Fri, 26 Apr 2019 17:59:50 -0700
Subject: [PATCH 06/17] filter-repo (python3): handle conversion of glob to
 regex

python3 forces a couple issues for us with the conversion of globs to
regexes:
  * fnmatch.translate() will ONLY operate on unicode strings, not
    bytestrings.  Super lame.
  * newer versions of python3 modified the regex style used by
    fnmatch.translate() causing us to need extra logic to 'fixup'
    the regex into the form we want.
Split the code for translating the glob to a regex out into a separate
function which now houses more complicated logic to handle these extra
conditions.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 5420acd..399a921 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -119,6 +119,33 @@ def decode(bytestr):
   'Try to convert bytestr to utf-8 for outputting as an error message.'
   return bytestr.decode('utf-8', 'backslashreplace')
 
+def glob_to_regex(glob_bytestr):
+  'Translate glob_bytestr into a regex on bytestrings'
+
+  # fnmatch.translate is idiotic and won't accept bytestrings
+  if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover
+    raise SystemExit(_("Error: Cannot handle glob %s").format(glob_bytestr))
+
+  # Create regex operating on string
+  regex = fnmatch.translate(decode(glob_bytestr))
+
+  # FIXME: This is an ugly hack...
+  # fnmatch.translate tries to do multi-line matching and wants the glob to
+  # match up to the end of the input, which isn't relevant for us, so we
+  # have to modify the regex.  fnmatch.translate has used different regex
+  # constructs to achieve this with different python versions, so we have
+  # to check for each of them and then fix it up.  It would be much better
+  # if fnmatch.translate could just take some flags to allow us to specify
+  # what we want rather than employing this hackery, but since it
+  # doesn't...
+  if regex.endswith(r'\Z(?ms)'): # pragma: no cover
+    regex = regex[0:-7]
+  elif regex.startswith(r'(?s:') and regex.endswith(r')\Z'): # pragma: no cover
+    regex = regex[4:-3]
+
+  # Finally, convert back to regex operating on bytestr
+  return regex.encode()
+
 class PathQuoting:
   _unescape = {'a': '\a',
                'b': '\b',
@@ -2037,9 +2064,7 @@ class FilteringOptions(object):
         if line.startswith('regex:'):
           regex = line[6:]
         elif line.startswith('glob:'):
-          regex = fnmatch.translate(line[5:])
-          if regex.endswith(r'\Z(?ms)'):
-            regex = regex[0:-7]
+          regex = glob_to_regex(line[5:])
         if regex:
           replace_regexes.append((re.compile(regex), replacement))
         else:

From 6e78788feb879379d0fe541a6f5c9fd90dd8d643 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Mon, 1 Apr 2019 14:49:28 -0700
Subject: [PATCH 07/17] filter-repo (python3): more flush()ing needed under
 python3

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 1 +
 1 file changed, 1 insertion(+)

diff --git a/git-filter-repo b/git-filter-repo
index 399a921..4d2562c 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -1655,6 +1655,7 @@ class FastExportFilter(object):
                                                    stdout=subprocess.PIPE,
                                                    cwd=self._repo_working_dir)
           batch_check_process.stdin.write(refname+"\n")
+          batch_check_process.stdin.flush()
           line = batch_check_process.stdout.readline()
           m = batch_check_output_re.match(line)
           if not m or m.group(2) != 'tag':

From effcd5b9ff01cb1a2cfc68d30167c62882fbc424 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Fri, 26 Apr 2019 18:09:21 -0700
Subject: [PATCH 08/17] filter-repo (python3): convert run_coverage

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 t/run_coverage | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/t/run_coverage b/t/run_coverage
index 0e2fe74..3abd9af 100755
--- a/t/run_coverage
+++ b/t/run_coverage
@@ -21,8 +21,8 @@ export PYTHONPATH=$tmpdir:
 ls t939*.sh | xargs -n 1 bash
 
 cd $tmpdir
-python-coverage combine
-python-coverage html -d $orig_dir/report
-python-coverage report -m
+python3-coverage combine
+python3-coverage html -d $orig_dir/report
+python3-coverage report -m
 cd $orig_dir
 rm -rf $tmpdir

From 8b8d6b4b43720fc731b7e4ce98d82f00a781f76c Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 11:32:40 -0700
Subject: [PATCH 09/17] filter-repo (python3): ensure stdin and args are bytes
 instead of strings

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo    | 22 +++++++++++++---------
 t/t9391/unusual.py |  4 ++--
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 4d2562c..3645697 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -1897,16 +1897,17 @@ class FilteringOptions(object):
                "files matching none of those options."))
 
     path.add_argument('--path-match', '--path', metavar='DIR_OR_FILE',
+        type=os.fsencode,
         action=FilteringOptions.AppendFilter, dest='path_changes',
         help=_("Exact paths (files or directories) to include in filtered "
                "history.  Multiple --path options can be specified to get "
                "a union of paths."))
-    path.add_argument('--path-glob', metavar='GLOB',
+    path.add_argument('--path-glob', metavar='GLOB', type=os.fsencode,
         action=FilteringOptions.AppendFilter, dest='path_changes',
         help=_("Glob of paths to include in filtered history. Multiple "
                "--path-glob options can be specified to get a union of "
                "paths."))
-    path.add_argument('--path-regex', metavar='REGEX',
+    path.add_argument('--path-regex', metavar='REGEX', type=os.fsencode,
         action=FilteringOptions.AppendFilter, dest='path_changes',
         help=_("Regex of paths to include in filtered history. Multiple "
                "--path-regex options can be specified to get a union of "
@@ -1914,31 +1915,32 @@ class FilteringOptions(object):
 
     rename = parser.add_argument_group(title=_("Renaming based on paths"))
     rename.add_argument('--path-rename', '--path-rename-prefix',
-        metavar='OLD_NAME:NEW_NAME', dest='path_changes',
+        metavar='OLD_NAME:NEW_NAME', dest='path_changes', type=os.fsencode,
         action=FilteringOptions.AppendFilter,
         help=_("Prefix to rename; if filename starts with OLD_NAME, "
                "replace that with NEW_NAME.  Multiple --path-rename "
                "options can be specified."))
 
     refrename = parser.add_argument_group(title=_("Renaming of refs"))
-    refrename.add_argument('--tag-rename', metavar='OLD:NEW',
+    refrename.add_argument('--tag-rename', metavar='OLD:NEW', type=os.fsencode,
         help=_("Rename tags starting with OLD to start with NEW.  For "
                "example, --tag-rename foo:bar will rename tag foo-1.2.3 "
                "to bar-1.2.3; either OLD or NEW can be empty."))
 
     helpers = parser.add_argument_group(title=_("Shortcuts"))
     helpers.add_argument('--subdirectory-filter', metavar='DIRECTORY',
-        action=FilteringOptions.HelperFilter,
+        action=FilteringOptions.HelperFilter, type=os.fsencode,
         help=_("Only look at history that touches the given subdirectory "
                "and treat that directory as the project root.  Equivalent "
                "to using '--path DIRECTORY/ --path-rename DIRECTORY/:'"))
     helpers.add_argument('--to-subdirectory-filter', metavar='DIRECTORY',
-        action=FilteringOptions.HelperFilter,
+        action=FilteringOptions.HelperFilter, type=os.fsencode,
         help=_("Treat the project root as instead being under DIRECTORY. "
                "Equivalent to using '--path-rename :DIRECTORY/'"))
 
     people = parser.add_argument_group(title=_("Filtering of names/emails"))
     people.add_argument('--mailmap', dest='mailmap', metavar='FILENAME',
+        type=os.fsencode,
         help=_("Use specified mailmap file (see git-shortlog(1) for "
                "details on the format) when rewriting author, committer, "
                "and tagger names and emails.  If the specified file is "
@@ -1988,8 +1990,9 @@ class FilteringOptions(object):
                "CALLBACKS section below."))
 
     location = parser.add_argument_group(title=_("Location to filter from/to"))
-    location.add_argument('--source', help=_("Git repository to read from"))
-    location.add_argument('--target',
+    location.add_argument('--source', type=os.fsencode,
+                          help=_("Git repository to read from"))
+    location.add_argument('--target', type=os.fsencode,
         help=_("Git repository to overwrite with filtered history"))
 
     misc = parser.add_argument_group(title=_("Miscellaneous options"))
@@ -2947,7 +2950,8 @@ class RepoFilter(object):
 
   def _setup_input(self, use_done_feature):
     if self._args.stdin:
-      self._input = sys.stdin
+      self._input = sys.stdin.detach()
+      sys.stdin = None # Make sure no one tries to accidentally use it
       self._fe_orig = None
     else:
       skip_blobs = (self._blob_callback is None and
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index 6a61dbe..684c105 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -58,8 +58,8 @@ filter = fr.FastExportFilter('.',
                              checkpoint_callback = handle_checkpoint,
                              everything_callback = track_everything)
 
-filter.run(input = sys.stdin,
-           output = open(os.devnull, 'w'),
+filter.run(input = sys.stdin.detach(),
+           output = open(os.devnull, 'wb'),
            fast_import_pipes = None,
            quiet = True)
 # DO NOT depend upon or use _IDS directly you external script writers.  I'm

From 9b3134b68ce94488e8addc93a1830be48a47c82e Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 11:47:12 -0700
Subject: [PATCH 10/17] filter-repo (python3): ensure file reads and writes are
 done in bytes

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo    | 36 ++++++++++++++++++------------------
 t/t9391/unusual.py |  2 +-
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 3645697..e006c5a 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -266,7 +266,7 @@ class MailmapInfo(object):
     comment_re = re.compile(r'\s*#.*')
     if not os.access(filename, os.R_OK):
       raise SystemExit(_("Cannot read %s") % filename)
-    with open(filename) as f:
+    with open(filename, 'br') as f:
       count = 0
       for line in f:
         count += 1
@@ -1633,14 +1633,14 @@ class FastExportFilter(object):
   def record_metadata(self, metadata_dir, orig_refs, refs_nuked):
     deleted_hash = '0'*40
     self._flush_renames()
-    with open(os.path.join(metadata_dir, 'commit-map'), 'w') as f:
+    with open(os.path.join(metadata_dir, 'commit-map'), 'bw') as f:
       f.write("%-40s %s\n" % (_("old"), _("new")))
       for (old,new) in self._commit_renames.items():
         f.write('{} {}\n'.format(old, new if new != None else deleted_hash))
 
     batch_check_process = None
     batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
-    with open(os.path.join(metadata_dir, 'ref-map'), 'w') as f:
+    with open(os.path.join(metadata_dir, 'ref-map'), 'bw') as f:
       for refname, old_hash in orig_refs.items():
         if refname in refs_nuked:
           new_hash = deleted_hash
@@ -1669,7 +1669,7 @@ class FastExportFilter(object):
       batch_check_process.stdin.close()
       batch_check_process.wait()
 
-    with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'w') as f:
+    with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'bw') as f:
       issues_found = False
       if self._commits_no_longer_merges:
         issues_found = True
@@ -1698,7 +1698,7 @@ class FastExportFilter(object):
       if not issues_found:
         f.write(_("No filtering problems encountered."))
 
-    with open(os.path.join(metadata_dir, 'already_ran'), 'w') as f:
+    with open(os.path.join(metadata_dir, 'already_ran'), 'bw') as f:
        f.write(_("This file exists to allow you to filter again without --force."))
 
   def get_seen_refs(self):
@@ -2054,7 +2054,7 @@ class FilteringOptions(object):
   def get_replace_text(filename):
     replace_literals = []
     replace_regexes = []
-    with open(filename) as f:
+    with open(filename, 'br') as f:
       for line in f:
         line = line.rstrip('\r\n')
 
@@ -2353,7 +2353,7 @@ class RepoAnalyze(object):
     for name in dir_size['packed']:
       dir_deleted_data[name] = stats['tree_deletions'].get(name, None)
 
-    with open(os.path.join(reportdir, "README"), 'w') as f:
+    with open(os.path.join(reportdir, "README"), 'bw') as f:
       # Give a basic overview of this file
       f.write("== %s ==\n" % _("Overall Statistics"))
       f.write("  %s: %d\n" % (_("Number of commits"),
@@ -2443,7 +2443,7 @@ class RepoAnalyze(object):
     # Equivalence classes for names, so if folks only want to keep a
     # certain set of paths, they know the old names they want to include
     # too.
-    with open(os.path.join(reportdir, "renames.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "renames.txt"), 'bw') as f:
       seen = set()
       for pathname,equiv_group in sorted(stats['equivalence'].items(),
                                          key=lambda x:(x[1], x[0])):
@@ -2455,7 +2455,7 @@ class RepoAnalyze(object):
                 "\n")
 
     # List directories in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("Deleted directories by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
       for dirname, size in sorted(dir_size['packed'].items(),
@@ -2467,7 +2467,7 @@ class RepoAnalyze(object):
                           datestr(dir_deleted_data[dirname]),
                           dirname or _('<toplevel>')))
 
-    with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("All directories by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
       for dirname, size in sorted(dir_size['packed'].items(),
@@ -2479,7 +2479,7 @@ class RepoAnalyze(object):
                         dirname or _("<toplevel>")))
 
     # List extensions in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("Deleted extensions by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
       for extname, size in sorted(ext_size['packed'].items(),
@@ -2491,7 +2491,7 @@ class RepoAnalyze(object):
                           datestr(ext_deleted_data[extname]),
                           extname or _('<no extension>')))
 
-    with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("All extensions by reverse size"))
       f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
       for extname, size in sorted(ext_size['packed'].items(),
@@ -2503,7 +2503,7 @@ class RepoAnalyze(object):
                         extname or _('<no extension>')))
 
     # List files in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size"))
       f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n"))
       for pathname, size in sorted(path_size['packed'].items(),
@@ -2516,7 +2516,7 @@ class RepoAnalyze(object):
                           datestr(when),
                           pathname))
 
-    with open(os.path.join(reportdir, "path-all-sizes.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "path-all-sizes.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("All paths by reverse accumulated size"))
       f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n"))
       for pathname, size in sorted(path_size['packed'].items(),
@@ -2529,7 +2529,7 @@ class RepoAnalyze(object):
                         pathname))
 
     # List of filenames and sizes in descending order
-    with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'w') as f:
+    with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'bw') as f:
       f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size"))
       f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n"))
       for sha, size in sorted(stats['packed_size'].items(),
@@ -2735,7 +2735,7 @@ class RepoFilter(object):
     for root, dirs, files in os.walk(reflog_dir):
       for filename in files:
         pathname = os.path.join(root, filename)
-        with open(pathname) as f:
+        with open(pathname, 'br') as f:
           if len(f.read().splitlines()) > 1:
             shortpath = pathname[len(reflog_dir)+1:]
             abort(_("expected at most one entry in the reflog for %s") %
@@ -2970,7 +2970,7 @@ class RepoFilter(object):
       if self._args.dry_run or self._args.debug:
         self._fe_orig = os.path.join(self.results_tmp_dir(),
                                      'fast-export.original')
-        output = open(self._fe_orig, 'w')
+        output = open(self._fe_orig, 'bw')
         self._input = InputFileBackup(self._input, output)
         if self._args.debug:
           print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
@@ -2988,7 +2988,7 @@ class RepoFilter(object):
     if self._args.dry_run or self._args.debug:
       self._fe_filt = os.path.join(self.results_tmp_dir(),
                                    'fast-export.filtered')
-      self._output = open(self._fe_filt, 'w')
+      self._output = open(self._fe_filt, 'bw')
     else:
       self._output = self._fip.stdin
     if self._args.debug:
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index 684c105..6817c65 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -59,7 +59,7 @@ filter = fr.FastExportFilter('.',
                              everything_callback = track_everything)
 
 filter.run(input = sys.stdin.detach(),
-           output = open(os.devnull, 'wb'),
+           output = open(os.devnull, 'bw'),
            fast_import_pipes = None,
            quiet = True)
 # DO NOT depend upon or use _IDS directly you external script writers.  I'm

From 0279e3882d07bba8cba7a81875c6b898fdaedf83 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 12:05:42 -0700
Subject: [PATCH 11/17] filter-repo (python3): error messages should be strings
 instead of bytes

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index e006c5a..60ff51f 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -265,7 +265,7 @@ class MailmapInfo(object):
     name_and_email_re = re.compile(r'(.*?)\s*<([^>]+)>\s*')
     comment_re = re.compile(r'\s*#.*')
     if not os.access(filename, os.R_OK):
-      raise SystemExit(_("Cannot read %s") % filename)
+      raise SystemExit(_("Cannot read %s") % decode(filename))
     with open(filename, 'br') as f:
       count = 0
       for line in f:
@@ -2450,8 +2450,8 @@ class RepoAnalyze(object):
         if equiv_group in seen:
           continue
         seen.add(equiv_group)
-        f.write("{} ->\n    ".format(equiv_group[0]) +
-                     "\n    ".join(equiv_group[1:]) +
+        f.write("{} ->\n    ".format(decode(equiv_group[0])) +
+                     "\n    ".join(decode(x) for x in equiv_group[1:]) +
                 "\n")
 
     # List directories in reverse sorted order of unpacked size
@@ -2565,7 +2565,7 @@ class RepoAnalyze(object):
     stats = RepoAnalyze.gather_data(args)
 
     # Write the reports
-    sys.stdout.write(_("Writing reports to %s...") % reportdir)
+    sys.stdout.write(_("Writing reports to %s...") % decode(reportdir))
     sys.stdout.flush()
     RepoAnalyze.write_report(reportdir, stats)
     sys.stdout.write(_("done.\n"))
@@ -2739,7 +2739,7 @@ class RepoFilter(object):
           if len(f.read().splitlines()) > 1:
             shortpath = pathname[len(reflog_dir)+1:]
             abort(_("expected at most one entry in the reflog for %s") %
-                  shortpath)
+                  decode(shortpath))
 
     # Make sure there are no stashed changes
     if 'refs/stash' in refs:
@@ -2761,9 +2761,11 @@ class RepoFilter(object):
           continue
         origin_ref = refname.replace('refs/heads/', 'refs/remotes/origin/')
         if origin_ref not in refs:
-          abort(_('%s exists, but %s not found') % (refname, origin_ref))
+          abort(_('%s exists, but %s not found') % (decode(refname),
+                                                    decode(origin_ref)))
         if rev != refs[origin_ref]:
-          abort(_('%s does not match %s') % (refname, origin_ref))
+          abort(_('%s does not match %s') % (decode(refname),
+                                             decode(origin_ref)))
 
   @staticmethod
   def tweak_blob(args, blob):
@@ -2974,7 +2976,8 @@ class RepoFilter(object):
         self._input = InputFileBackup(self._input, output)
         if self._args.debug:
           print("[DEBUG] Running: {}".format(' '.join(fep_cmd)))
-          print("  (saving a copy of the output at {})".format(self._fe_orig))
+          print("  (saving a copy of the output at {})"
+                .format(decode(self._fe_orig)))
 
   def _setup_output(self):
     if not self._args.dry_run:
@@ -2994,7 +2997,8 @@ class RepoFilter(object):
     if self._args.debug:
       self._output = DualFileWriter(self._fip.stdin, self._output)
       print("[DEBUG] Running: {}".format(' '.join(fip_cmd)))
-      print("  (using the following file as input: {})".format(self._fe_filt))
+      print("  (using the following file as input: {})"
+            .format(decode(self._fe_filt)))
 
   def _migrate_origin_to_heads(self):
     if self._args.dry_run:
@@ -3099,10 +3103,10 @@ class RepoFilter(object):
       print(_("NOTE: Not running fast-import or cleaning up; --dry-run passed."))
       if self._fe_orig:
         print(_("      Requested filtering can be seen by comparing:"))
-        print("        " + self._fe_orig)
+        print("        " + decode(self._fe_orig))
       else:
         print(_("      Requested filtering can be seen at:"))
-      print("        " + self._fe_filt)
+      print("        " + decode(self._fe_filt))
       return
 
     target_working_dir = self._args.target or '.'
@@ -3112,7 +3116,7 @@ class RepoFilter(object):
       if refs_to_nuke:
         if self._args.debug:
           print("[DEBUG] Deleting the following refs:\n  "+
-                "\n  ".join(refs_to_nuke))
+                decode("\n  ".join(refs_to_nuke)))
         p = subprocess.Popen('git update-ref --stdin'.split(),
                              stdin=subprocess.PIPE,
                              cwd=target_working_dir)

From c3072c7f013c409662b6f41e99868d4dd3cc97f8 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 14:10:47 -0700
Subject: [PATCH 12/17] filter-repo (python3): convert StringIO->BytesIO and
 __str__->__bytes__

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo    | 4 ++--
 t/t9391/unusual.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 60ff51f..a76b90f 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -432,12 +432,12 @@ class _GitElement(object):
     raise SystemExit(_("Unimplemented function: %s") % type(self).__name__
                      +".dump()") # pragma: no cover
 
-  def __str__(self):
+  def __bytes__(self):
     """
     Convert GitElement to string; used for debugging
     """
     old_dumped = self.dumped
-    writeme = io.StringIO()
+    writeme = io.BytesIO()
     self.dump(writeme)
     output_lines = writeme.getvalue().splitlines()
     writeme.close()
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index 6817c65..c7a2c57 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -50,7 +50,7 @@ compare = "Blob:\n  blob\n  mark :1\n  data {}\n  {}".format(len(mystr), mystr)
 # debugging git-filter-repo; it is NOT something external folks should depend
 # upon.
 myblob = fr.Blob(mystr)
-assert str(myblob) == compare
+assert bytes(myblob) == compare
 # Everyone should be using RepoFilter objects, not FastExportFilter.  But for
 # testing purposes...
 filter = fr.FastExportFilter('.',
@@ -71,7 +71,7 @@ print("Found {} blobs/commits and {} other objects"
       .format(total_objects['common'], total_objects['uncommon']))
 
 
-stream = io.StringIO(textwrap.dedent('''
+stream = io.BytesIO(textwrap.dedent('''
   blob
   mark :1
   data 5

From ca5818056d20de08db7191eda657f3eff35931e3 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 14:16:31 -0700
Subject: [PATCH 13/17] filter-repo (python3): oct strings in python3 use "0o"
 instead of "0"

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-filter-repo b/git-filter-repo
index a76b90f..bd4682a 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -157,7 +157,7 @@ class PathQuoting:
                '"': '"',
                '\\':'\\'}
   _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})')
-  _escape = [chr(x) for x in range(127)]+['\\'+oct(x)[1:] for x in range(127,256)]
+  _escape = [chr(x) for x in range(127)]+['\\'+oct(x)[2:] for x in range(127,256)]
   _reverse = dict(map(reversed, _unescape.items()))
   for x in _reverse:
     _escape[ord(x)] = '\\'+_reverse[x]

From 4c05cbe07201728a180c210fd3ec50d98d05fe95 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 14:34:59 -0700
Subject: [PATCH 14/17] filter-repo (python3): bytes() instead of chr() or
 string join

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo         | 5 +++--
 t/t9391/splice_repos.py | 2 +-
 t/t9391/unusual.py      | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index bd4682a..b694e8b 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -157,7 +157,8 @@ class PathQuoting:
                '"': '"',
                '\\':'\\'}
   _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})')
-  _escape = [chr(x) for x in range(127)]+['\\'+oct(x)[2:] for x in range(127,256)]
+  _escape = [bytes([x]) for x in range(127)]+[
+             '\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
   _reverse = dict(map(reversed, _unescape.items()))
   for x in _reverse:
     _escape[ord(x)] = '\\'+_reverse[x]
@@ -166,7 +167,7 @@ class PathQuoting:
   @staticmethod
   def unescape_sequence(orig):
     seq = orig.group(1)
-    return PathQuoting._unescape[seq] if len(seq) == 1 else chr(int(seq, 8))
+    return PathQuoting._unescape[seq] if len(seq) == 1 else bytes([int(seq, 8)])
 
   @staticmethod
   def dequote(quoted_string):
diff --git a/t/t9391/splice_repos.py b/t/t9391/splice_repos.py
index c7834c7..133044e 100755
--- a/t/t9391/splice_repos.py
+++ b/t/t9391/splice_repos.py
@@ -34,7 +34,7 @@ class InterleaveRepositories:
 
   def weave_commit(self, commit):
     letter = re.match('Commit (.)', commit.message).group(1)
-    prev_letter = chr(ord(letter)-1)
+    prev_letter = bytes([ord(letter)-1])
 
     # Splice in any extra commits needed
     if prev_letter in self.commit_map:
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index c7a2c57..da0cf89 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -21,7 +21,7 @@ import textwrap
 import git_filter_repo as fr
 
 def handle_progress(progress):
-  print("Decipher this: "+''.join(reversed(progress.message)))
+  print("Decipher this: "+bytes(reversed(progress.message)))
 
 def handle_checkpoint(checkpoint_object):
   # Flip a coin; see if we want to pass the checkpoint through.

From 12602dae9c199125a79a26afa1883b3f53974e8a Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 14:47:34 -0700
Subject: [PATCH 15/17] filter-repo (python3): f.readline() instead of f.next()
 and StopIteration

File iterators, at least when opened in binary mode, apparently operately
differently in python3.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index b694e8b..0348474 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -2244,25 +2244,23 @@ class RepoAnalyze(object):
            ' --date=short -M -t -c --raw --combined-all-paths')
     dtp = subprocess.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)
     f = dtp.stdout
-    try:
-      line = f.next()
-    except StopIteration:
+    line = f.readline()
+    if not line:
       raise SystemExit(_("Nothing to analyze; repository is empty."))
     cont = bool(line)
     graph = AncestryGraph()
     while cont:
       commit = line.rstrip()
-      parents = f.next().split()
-      date = f.next().rstrip()
+      parents = f.readline().split()
+      date = f.readline().rstrip()
 
       # We expect a blank line next; if we get a non-blank line then
       # this commit modified no files and we need to move on to the next.
       # If there is no line, we've reached end-of-input.
-      try:
-        line = f.next().rstrip()
-        cont = True
-      except StopIteration:
+      line = f.readline()
+      if not line:
         cont = False
+      line = line.rstrip()
 
       # If we haven't reached end of input, and we got a blank line meaning
       # a commit that has modified files, then get the file changes associated

From 385b0586ca47109fd14f75dd27a8ed26d831582e Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 15:00:42 -0700
Subject: [PATCH 16/17] filter-repo (python3): bytestr splicing and iterating
 is different

Unlike how str works, if we grab an array index of a bytestr we get an
integer (corresponding to the ASCII value) instead of a bytestr of
length 1.  Adjust code accordingly.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 0348474..19742a1 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -181,11 +181,11 @@ class PathQuoting:
   def enquote(unquoted_string):
     # Option 1: Quoting when fast-export would:
     #    pqsc = PathQuoting._special_chars
-    #    if any(pqsc[ord(x)] for x in set(unquoted_string)):
+    #    if any(pqsc[x] for x in set(unquoted_string)):
     # Option 2, perf hack: do minimal amount of quoting required by fast-import
     if unquoted_string.startswith('"') or '\n' in unquoted_string:
       pqe = PathQuoting._escape
-      return '"' + ''.join(pqe[ord(x)] for x in unquoted_string) + '"'
+      return '"' + ''.join(pqe[x] for x in unquoted_string) + '"'
     return unquoted_string
 
 class AncestryGraph(object):
@@ -975,10 +975,10 @@ class FastExportFilter(object):
     of file-changes that fast-export will provide).
     """
     filechange = None
-    changetype = self._currentline[0]
+    changetype = self._currentline[0:1]
     if changetype == 'M':
       (changetype, mode, idnum, path) = self._currentline.split(None, 3)
-      if idnum[0] == ':':
+      if idnum[0:1] == ':':
         idnum = idnum[1:]
       path = path.rstrip('\n')
       # We translate the idnum to our id system
@@ -2136,7 +2136,7 @@ class RepoAnalyze(object):
   @staticmethod
   def handle_renames(stats, commit, change_types, filenames):
     for index, change_type in enumerate(change_types):
-      if change_type == 'R':
+      if change_type == ord(b'R'):
         oldname, newname = filenames[index], filenames[-1]
         RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname)
         RepoAnalyze.setup_or_update_rename_history(stats, commit,
@@ -2780,9 +2780,9 @@ class RepoFilter(object):
         return True
       n = len(path_expression)
       if (pathname.startswith(path_expression) and
-          (path_expression[n-1] == '/' or
+          (path_expression[n-1:n] == '/' or
            len(pathname) == n or
-           pathname[n] == '/')):
+           pathname[n:n+1] == '/')):
         return True
       return False
 

From 35052f673d314eae542926dce393d4b77fe4ff26 Mon Sep 17 00:00:00 2001
From: Elijah Newren <newren@gmail.com>
Date: Sat, 27 Apr 2019 15:18:59 -0700
Subject: [PATCH 17/17] filter-repo (python3): replace strings with bytestrings

This is by far the largest python3 change; it consists basically of
  * using b'<str>' instead of '<str>' in lots of places
  * adding a .encode() if we really do work with a string but need to
    get it converted to a bytestring
  * replace uses of .format() with interpolation via the '%' operator,
    since bytestrings don't have a .format() method.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 git-filter-repo                      | 636 ++++++++++++++-------------
 t/t9390-filter-repo.sh               |  10 +-
 t/t9391/commit_info.py               |  10 +-
 t/t9391/create_fast_export_output.py |  94 ++--
 t/t9391/file_filter.py               |   4 +-
 t/t9391/rename-master-to-develop.py  |   4 +-
 t/t9391/splice_repos.py              |  12 +-
 t/t9391/strip-cvs-keywords.py        |   4 +-
 t/t9391/unusual.py                   |  10 +-
 t/t9392-python-callback.sh           |  20 +-
 10 files changed, 408 insertions(+), 396 deletions(-)

diff --git a/git-filter-repo b/git-filter-repo
index 19742a1..e66e27e 100755
--- a/git-filter-repo
+++ b/git-filter-repo
@@ -87,12 +87,12 @@ class FixedTimeZone(tzinfo):
   Fixed offset in minutes east from UTC.
   """
 
-  tz_re = re.compile(r'^([-+]?)(\d\d)(\d\d)$')
+  tz_re = re.compile(br'^([-+]?)(\d\d)(\d\d)$')
 
   def __init__(self, offset_string):
     tzinfo.__init__(self)
     sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups()
-    factor = -1 if (sign and sign == '-') else 1
+    factor = -1 if (sign and sign == b'-') else 1
     self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm)))
     self._offset_string = offset_string
 
@@ -112,8 +112,8 @@ def string_to_date(datestring):
 
 def date_to_string(dateobj):
   epoch = datetime.fromtimestamp(0, dateobj.tzinfo)
-  return('{} {}'.format(int(_timedelta_to_seconds(dateobj - epoch)),
-                        dateobj.tzinfo.tzname(0)))
+  return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)),
+                     dateobj.tzinfo.tzname(0)))
 
 def decode(bytestr):
   'Try to convert bytestr to utf-8 for outputting as an error message.'
@@ -147,21 +147,21 @@ def glob_to_regex(glob_bytestr):
   return regex.encode()
 
 class PathQuoting:
-  _unescape = {'a': '\a',
-               'b': '\b',
-               'f': '\f',
-               'n': '\n',
-               'r': '\r',
-               't': '\t',
-               'v': '\v',
-               '"': '"',
-               '\\':'\\'}
-  _unescape_re = re.compile(r'\\([a-z"\\]|[0-9]{3})')
+  _unescape = {b'a': b'\a',
+               b'b': b'\b',
+               b'f': b'\f',
+               b'n': b'\n',
+               b'r': b'\r',
+               b't': b'\t',
+               b'v': b'\v',
+               b'"': b'"',
+               b'\\':b'\\'}
+  _unescape_re = re.compile(br'\\([a-z"\\]|[0-9]{3})')
   _escape = [bytes([x]) for x in range(127)]+[
-             '\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
+             b'\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]
   _reverse = dict(map(reversed, _unescape.items()))
   for x in _reverse:
-    _escape[ord(x)] = '\\'+_reverse[x]
+    _escape[ord(x)] = b'\\'+_reverse[x]
   _special_chars = [len(x) > 1 for x in _escape]
 
   @staticmethod
@@ -171,8 +171,8 @@ class PathQuoting:
 
   @staticmethod
   def dequote(quoted_string):
-    if quoted_string.startswith('"'):
-      assert quoted_string.endswith('"')
+    if quoted_string.startswith(b'"'):
+      assert quoted_string.endswith(b'"')
       return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence,
                                           quoted_string[1:-1])
     return quoted_string
@@ -183,9 +183,9 @@ class PathQuoting:
     #    pqsc = PathQuoting._special_chars
     #    if any(pqsc[x] for x in set(unquoted_string)):
     # Option 2, perf hack: do minimal amount of quoting required by fast-import
-    if unquoted_string.startswith('"') or '\n' in unquoted_string:
+    if unquoted_string.startswith(b'"') or b'\n' in unquoted_string:
       pqe = PathQuoting._escape
-      return '"' + ''.join(pqe[x] for x in unquoted_string) + '"'
+      return b'"' + b''.join(pqe[x] for x in unquoted_string) + b'"'
     return unquoted_string
 
 class AncestryGraph(object):
@@ -263,8 +263,8 @@ class MailmapInfo(object):
     self._parse_file(filename)
 
   def _parse_file(self, filename):
-    name_and_email_re = re.compile(r'(.*?)\s*<([^>]+)>\s*')
-    comment_re = re.compile(r'\s*#.*')
+    name_and_email_re = re.compile(br'(.*?)\s*<([^>]+)>\s*')
+    comment_re = re.compile(br'\s*#.*')
     if not os.access(filename, os.R_OK):
       raise SystemExit(_("Cannot read %s") % decode(filename))
     with open(filename, 'br') as f:
@@ -273,7 +273,7 @@ class MailmapInfo(object):
         count += 1
         err = "Unparseable mailmap file: line #{} is bad: {}".format(count, line)
         # Remove comments
-        line = comment_re.sub('', line)
+        line = comment_re.sub(b'', line)
         # Remove leading and trailing whitespace
         line = line.strip()
         if not line:
@@ -443,7 +443,8 @@ class _GitElement(object):
     output_lines = writeme.getvalue().splitlines()
     writeme.close()
     self.dumped = old_dumped
-    return "{}:\n  {}".format(type(self).__name__, "\n  ".join(output_lines))
+    return b"%s:\n  %s" % (type(self).__name__.encode(),
+                           b"\n  ".join(output_lines))
 
   def skip(self, new_id=None):
     """
@@ -491,6 +492,7 @@ class Blob(_GitElementWithId):
     self.original_id = original_id
 
     # Stores the blob's data
+    assert(type(data) == bytes)
     self.data = data
 
   def dump(self, file_):
@@ -499,10 +501,10 @@ class Blob(_GitElementWithId):
     """
     self.dumped = 1
 
-    file_.write('blob\n')
-    file_.write('mark :%d\n' % self.id)
-    file_.write('data %d\n%s' % (len(self.data), self.data))
-    file_.write('\n')
+    file_.write(b'blob\n')
+    file_.write(b'mark :%d\n' % self.id)
+    file_.write(b'data %d\n%s' % (len(self.data), self.data))
+    file_.write(b'\n')
 
 
 class Reset(_GitElement):
@@ -530,10 +532,10 @@ class Reset(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('reset %s\n' % self.ref)
+    file_.write(b'reset %s\n' % self.ref)
     if self.from_ref:
-      file_.write('from :%d\n' % self.from_ref)
-      file_.write('\n')
+      file_.write(b'from :%d\n' % self.from_ref)
+      file_.write(b'\n')
 
 class FileChanges(_GitElement):
   """
@@ -544,7 +546,10 @@ class FileChanges(_GitElement):
   def __init__(self, type_, filename, id_ = None, mode = None):
     _GitElement.__init__(self)
 
-    # Denote the type of file-change (M for modify, D for delete, etc)
+    # Denote the type of file-change (b'M' for modify, b'D' for delete, etc)
+    # We could
+    #   assert(type(type_) == bytes)
+    # here but I don't just due to worries about performance overhead...
     self.type = type_
 
     # Record the name of the file being changed
@@ -557,15 +562,15 @@ class FileChanges(_GitElement):
     # blob_id is the id (mark) of the affected blob
     self.blob_id = None
 
-    # For 'M' file changes (modify), expect to have id and mode
-    if type_ == 'M':
+    # For b'M' file changes (modify), expect to have id and mode
+    if type_ == b'M':
       if mode is None:
         raise SystemExit(_("file mode and idnum needed for %s") % filename) # pragma: no cover
       self.mode = mode
       self.blob_id = id_
 
-    # For 'R' file changes (rename), expect to have newname as third arg
-    elif type_ == 'R':  # pragma: no cover (now avoid fast-export renames)
+    # For b'R' file changes (rename), expect to have newname as third arg
+    elif type_ == b'R':  # pragma: no cover (now avoid fast-export renames)
       if id_ is None:
         raise SystemExit(_("new name needed for rename of %s") % filename)
       self.filename = (self.filename, id_)
@@ -574,17 +579,17 @@ class FileChanges(_GitElement):
     """
     Write this file-change element to a file
     """
-    skipped_blob = (self.type == 'M' and self.blob_id is None)
+    skipped_blob = (self.type == b'M' and self.blob_id is None)
     if skipped_blob: return
     self.dumped = 1
 
     quoted_filename = PathQuoting.enquote(self.filename)
-    if self.type == 'M' and isinstance(self.blob_id, int):
-      file_.write('M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename))
-    elif self.type == 'M':
-      file_.write('M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename))
-    elif self.type == 'D':
-      file_.write('D %s\n' % quoted_filename)
+    if self.type == b'M' and isinstance(self.blob_id, int):
+      file_.write(b'M %s :%d %s\n' % (self.mode, self.blob_id, quoted_filename))
+    elif self.type == b'M':
+      file_.write(b'M %s %s %s\n' % (self.mode, self.blob_id, quoted_filename))
+    elif self.type == b'D':
+      file_.write(b'D %s\n' % quoted_filename)
     else:
       raise SystemExit(_("Unhandled filechange type: %s") % self.type) # pragma: no cover
 
@@ -648,16 +653,16 @@ class Commit(_GitElementWithId):
 
     # Make output to fast-import slightly easier for humans to read if the
     # message has no trailing newline of its own; cosmetic, but a nice touch...
-    extra_newline = '\n'
-    if self.message.endswith('\n') or not (self.parents or self.file_changes):
-      extra_newline = ''
+    extra_newline = b'\n'
+    if self.message.endswith(b'\n') or not (self.parents or self.file_changes):
+      extra_newline = b''
 
-    file_.write(('commit {}\n'
-                 'mark :{}\n'
-                 'author {} <{}> {}\n'
-                 'committer {} <{}> {}\n'
-                 'data {}\n{}{}'
-                ).format(
+    file_.write((b'commit %s\n'
+                 b'mark :%d\n'
+                 b'author %s <%s> %s\n'
+                 b'committer %s <%s> %s\n'
+                 b'data %d\n%s%s'
+                ) % (
                   self.branch, self.id,
                   self.author_name, self.author_email, self.author_date,
                   self.committer_name, self.committer_email, self.committer_date,
@@ -665,16 +670,18 @@ class Commit(_GitElementWithId):
                   extra_newline)
                )
     for i, parent in enumerate(self.parents):
-      mark = ':' if isinstance(parent, int) else ''
-      file_.write('from ' if i==0 else 'merge ')
-      file_.write('{}{}\n'.format(mark, parent))
+      file_.write(b'from ' if i==0 else b'merge ')
+      if isinstance(parent, int):
+        file_.write(b':%d\n' % parent)
+      else:
+        file_.write(b'%s\n' % parent)
     for change in self.file_changes:
       change.dump(file_)
     if not self.parents and not self.file_changes:
       # Workaround a bug in pre-git-2.22 versions of fast-import with
       # the get-mark directive.
-      file_.write('\n')
-    file_.write('\n')
+      file_.write(b'\n')
+    file_.write(b'\n')
 
   def first_parent(self):
     """
@@ -729,15 +736,15 @@ class Tag(_GitElement):
 
     self.dumped = 1
 
-    file_.write('tag %s\n' % self.ref)
-    mark = ':' if isinstance(self.from_ref, int) else ''
-    file_.write('from {}{}\n'.format(mark, self.from_ref))
+    file_.write(b'tag %s\n' % self.ref)
+    markfmt = b'from :%d\n' if isinstance(self.from_ref, int) else 'from %s\n'
+    file_.write(markfmt % self.from_ref)
     if self.tagger_name:
-      file_.write('tagger %s <%s> ' % (self.tagger_name, self.tagger_email))
+      file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email))
       file_.write(self.tagger_date)
-      file_.write('\n')
-    file_.write('data %d\n%s' % (len(self.message), self.message))
-    file_.write('\n')
+      file_.write(b'\n')
+    file_.write(b'data %d\n%s' % (len(self.message), self.message))
+    file_.write(b'\n')
 
 class Progress(_GitElement):
   """
@@ -761,8 +768,8 @@ class Progress(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('progress %s\n' % self.message)
-    file_.write('\n')
+    file_.write(b'progress %s\n' % self.message)
+    file_.write(b'\n')
 
 class Checkpoint(_GitElement):
   """
@@ -784,8 +791,8 @@ class Checkpoint(_GitElement):
     """
     self.dumped = 1
 
-    file_.write('checkpoint\n')
-    file_.write('\n')
+    file_.write(b'checkpoint\n')
+    file_.write(b'\n')
 
 class LiteralCommand(_GitElement):
   """
@@ -910,20 +917,20 @@ class FastExportFilter(object):
     self._files_tweaked = set()
 
     # Compile some regexes and cache those
-    self._mark_re = re.compile(r'mark :(\d+)\n$')
+    self._mark_re = re.compile(br'mark :(\d+)\n$')
     self._parent_regexes = {}
-    parent_regex_rules = ('{} :(\d+)\n$', '{} ([0-9a-f]{{40}})\n')
-    for parent_refname in ('from', 'merge'):
-      ans = [re.compile(x.format(parent_refname)) for x in parent_regex_rules]
+    parent_regex_rules = (b' :(\d+)\n$', b' ([0-9a-f]{40})\n')
+    for parent_refname in (b'from', b'merge'):
+      ans = [re.compile(parent_refname+x) for x in parent_regex_rules]
       self._parent_regexes[parent_refname] = ans
-    self._quoted_string_re = re.compile(r'"(?:[^"\\]|\\.)*"')
+    self._quoted_string_re = re.compile(br'"(?:[^"\\]|\\.)*"')
     self._refline_regexes = {}
-    for refline_name in ('reset', 'commit', 'tag', 'progress'):
-      self._refline_regexes[refline_name] = re.compile(refline_name+' (.*)\n$')
+    for refline_name in (b'reset', b'commit', b'tag', b'progress'):
+      self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\n$')
     self._user_regexes = {}
-    for user in ('author', 'committer', 'tagger'):
-      self._user_regexes[user] = re.compile(user + ' (.*?) <(.*?)> (.*)\n$')
-    self._hash_re = re.compile(r'(\b[0-9a-f]{7,40}\b)')
+    for user in (b'author', b'committer', b'tagger'):
+      self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\n$')
+    self._hash_re = re.compile(br'(\b[0-9a-f]{7,40}\b)')
 
   def _advance_currentline(self):
     """
@@ -971,51 +978,51 @@ class FastExportFilter(object):
     """
     If the current line contains a file-change object, then parse it
     and advance the current line; otherwise return None. We only care
-    about file changes of type 'M' and 'D' (these are the only types
+    about file changes of type b'M' and b'D' (these are the only types
     of file-changes that fast-export will provide).
     """
     filechange = None
     changetype = self._currentline[0:1]
-    if changetype == 'M':
+    if changetype == b'M':
       (changetype, mode, idnum, path) = self._currentline.split(None, 3)
-      if idnum[0:1] == ':':
+      if idnum[0:1] == b':':
         idnum = idnum[1:]
-      path = path.rstrip('\n')
+      path = path.rstrip(b'\n')
       # We translate the idnum to our id system
       if len(idnum) != 40:
         idnum = _IDS.translate( int(idnum) )
       if idnum is not None:
-        if path.startswith('"'):
+        if path.startswith(b'"'):
           path = PathQuoting.dequote(path)
-        filechange = FileChanges('M', path, idnum, mode)
+        filechange = FileChanges(b'M', path, idnum, mode)
       else:
-        filechange = 'skipped'
+        filechange = b'skipped'
       self._advance_currentline()
-    elif changetype == 'D':
+    elif changetype == b'D':
       (changetype, path) = self._currentline.split(None, 1)
-      path = path.rstrip('\n')
-      if path.startswith('"'):
+      path = path.rstrip(b'\n')
+      if path.startswith(b'"'):
         path = PathQuoting.dequote(path)
-      filechange = FileChanges('D', path)
+      filechange = FileChanges(b'D', path)
       self._advance_currentline()
-    elif changetype == 'R':  # pragma: no cover (now avoid fast-export renames)
+    elif changetype == b'R':  # pragma: no cover (now avoid fast-export renames)
       rest = self._currentline[2:-1]
-      if rest.startswith('"'):
+      if rest.startswith(b'"'):
         m = self._quoted_string_re.match(rest)
         if not m:
           raise SystemExit(_("Couldn't parse rename source"))
         orig = PathQuoting.dequote(m.group(0))
         new = rest[m.end()+1:]
       else:
-        orig, new = rest.split(' ', 1)
-      if new.startswith('"'):
+        orig, new = rest.split(b' ', 1)
+      if new.startswith(b'"'):
         new = PathQuoting.dequote(new)
-      filechange = FileChanges('R', orig, new)
+      filechange = FileChanges(b'R', orig, new)
       self._advance_currentline()
     return filechange
 
   def _parse_original_id(self):
-    original_id = self._currentline[len('original-oid '):].rstrip()
+    original_id = self._currentline[len(b'original-oid '):].rstrip()
     self._advance_currentline()
     return original_id
 
@@ -1049,8 +1056,8 @@ class FastExportFilter(object):
     # fast-import will not choke on.  Let's do that.  Note that +051800
     # seems to be the only weird timezone found in the wild, by me or some
     # other posts google returned on the subject...
-    if when.endswith('+051800'):
-      when = when[0:-7]+'+0261'
+    if when.endswith(b'+051800'):
+      when = when[0:-7]+b'+0261'
 
     self._advance_currentline()
     return (name, email, when)
@@ -1061,11 +1068,11 @@ class FastExportFilter(object):
     the data.
     """
     fields = self._currentline.split()
-    assert fields[0] == 'data'
+    assert fields[0] == b'data'
     size = int(fields[1])
     data = self._input.read(size)
     self._advance_currentline()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
     return data
 
@@ -1082,11 +1089,11 @@ class FastExportFilter(object):
     id_ = self._parse_optional_mark()
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     data = self._parse_data()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the blob
@@ -1117,9 +1124,9 @@ class FastExportFilter(object):
     the callback).
     """
     # Parse the Reset
-    ref = self._parse_ref_line('reset')
-    ignoreme, from_ref = self._parse_optional_parent_ref('from')
-    if self._currentline == '\n':
+    ref = self._parse_ref_line(b'reset')
+    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # fast-export likes to print extraneous resets that serve no purpose.
@@ -1342,19 +1349,19 @@ class FastExportFilter(object):
     for change in commit.file_changes:
       parent = new_1st_parent or commit.parents[0] # exists due to above checks
       quoted_filename = PathQuoting.enquote(change.filename)
-      self._output.write("ls :{} {}\n".format(parent, quoted_filename))
+      self._output.write(b"ls :%d %s\n" % (parent, quoted_filename))
       self._output.flush()
       parent_version = fi_output.readline().split()
-      if change.type == 'D':
-        if parent_version != ['missing', quoted_filename]:
+      if change.type == b'D':
+        if parent_version != [b'missing', quoted_filename]:
           return False
       else:
         blob_sha = change.blob_id
         if isinstance(change.blob_id, int):
-          self._output.write("get-mark :{}\n".format(change.blob_id))
+          self._output.write(b"get-mark :%d\n" % change.blob_id)
           self._output.flush()
           blob_sha = fi_output.readline().rstrip()
-        if parent_version != [change.mode, 'blob', blob_sha, quoted_filename]:
+        if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]:
           return False
 
     return True
@@ -1364,7 +1371,7 @@ class FastExportFilter(object):
     # Record the mapping of old commit hash to new one
     if commit.original_id and self._fast_import_pipes:
       fi_input, fi_output = self._fast_import_pipes
-      self._output.write("get-mark :{}\n".format(commit.id))
+      self._output.write(b"get-mark :%d\n" % commit.id)
       self._output.flush()
       orig_id = commit.original_id
       self._commit_short_old_hashes[orig_id[0:7]].add(orig_id)
@@ -1390,19 +1397,19 @@ class FastExportFilter(object):
     """
     # Parse the Commit. This may look involved, but it's pretty simple; it only
     # looks bad because a commit object contains many pieces of data.
-    branch = self._parse_ref_line('commit')
+    branch = self._parse_ref_line(b'commit')
     id_ = self._parse_optional_mark()
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     author_name = None
-    if self._currentline.startswith('author'):
-      (author_name, author_email, author_date) = self._parse_user('author')
+    if self._currentline.startswith(b'author'):
+      (author_name, author_email, author_date) = self._parse_user(b'author')
 
     (committer_name, committer_email, committer_date) = \
-      self._parse_user('committer')
+      self._parse_user(b'committer')
 
     if not author_name:
       (author_name, author_email, author_date) = \
@@ -1411,12 +1418,12 @@ class FastExportFilter(object):
     commit_msg = self._parse_data()
     commit_msg = self._hash_re.sub(self._translate_commit_hash, commit_msg)
 
-    pinfo = [self._parse_optional_parent_ref('from')]
+    pinfo = [self._parse_optional_parent_ref(b'from')]
     # Due to empty pruning, we can have real 'from' and 'merge' lines that
     # due to commit rewriting map to a parent of None.  We need to record
     # 'from' if its non-None, and we need to parse all 'merge' lines.
-    while self._currentline.startswith('merge '):
-      pinfo.append(self._parse_optional_parent_ref('merge'))
+    while self._currentline.startswith(b'merge '):
+      pinfo.append(self._parse_optional_parent_ref(b'merge'))
     orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)]
 
     # No parents is oddly represented as [None] instead of [], due to the
@@ -1434,10 +1441,10 @@ class FastExportFilter(object):
     file_change = self._parse_optional_filechange()
     had_file_changes = file_change is not None
     while file_change:
-      if not (type(file_change) == str and file_change == 'skipped'):
+      if not (type(file_change) == bytes and file_change == b'skipped'):
         file_changes.append(file_change)
       file_change = self._parse_optional_filechange()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Okay, now we can finally create the Commit object
@@ -1510,18 +1517,18 @@ class FastExportFilter(object):
     the callback).
     """
     # Parse the Tag
-    tag = self._parse_ref_line('tag')
-    ignoreme, from_ref = self._parse_optional_parent_ref('from')
+    tag = self._parse_ref_line(b'tag')
+    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')
 
     original_id = None
-    if self._currentline.startswith('original-oid'):
+    if self._currentline.startswith(b'original-oid'):
       original_id = self._parse_original_id();
 
     tagger_name, tagger_email, tagger_date = None, None, None
-    if self._currentline.startswith('tagger'):
-      (tagger_name, tagger_email, tagger_date) = self._parse_user('tagger')
+    if self._currentline.startswith(b'tagger'):
+      (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger')
     tag_msg = self._parse_data()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the tag
@@ -1544,7 +1551,7 @@ class FastExportFilter(object):
         tag.dump(self._output)
       # Record the fact that this tag was seen so we don't nuke it as part
       # of refs_to_nuke.
-      full_ref = 'refs/tags/{}'.format(tag.ref)
+      full_ref = b'refs/tags/' + tag.ref
       self._seen_refs[full_ref] = None
 
   def _parse_progress(self):
@@ -1556,8 +1563,8 @@ class FastExportFilter(object):
     everything else is done (unless it has been skipped by the callback).
     """
     # Parse the Progress
-    message = self._parse_ref_line('progress')
-    if self._currentline == '\n':
+    message = self._parse_ref_line(b'progress')
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the progress message
@@ -1585,7 +1592,7 @@ class FastExportFilter(object):
     """
     # Parse the Checkpoint
     self._advance_currentline()
-    if self._currentline == '\n':
+    if self._currentline == b'\n':
       self._advance_currentline()
 
     # Create the checkpoint
@@ -1632,16 +1639,17 @@ class FastExportFilter(object):
         reset.dump(self._output)
 
   def record_metadata(self, metadata_dir, orig_refs, refs_nuked):
-    deleted_hash = '0'*40
+    deleted_hash = b'0'*40
     self._flush_renames()
-    with open(os.path.join(metadata_dir, 'commit-map'), 'bw') as f:
-      f.write("%-40s %s\n" % (_("old"), _("new")))
+    with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f:
+      f.write(("%-40s %s\n" % (_("old"), _("new"))).encode())
       for (old,new) in self._commit_renames.items():
-        f.write('{} {}\n'.format(old, new if new != None else deleted_hash))
+        msg = b'%s %s\n' % (old, new if new != None else deleted_hash)
+        f.write(msg)
 
     batch_check_process = None
-    batch_check_output_re = re.compile('^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
-    with open(os.path.join(metadata_dir, 'ref-map'), 'bw') as f:
+    batch_check_output_re = re.compile(b'^([0-9a-f]{40}) ([a-z]+) ([0-9]+)$')
+    with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f:
       for refname, old_hash in orig_refs.items():
         if refname in refs_nuked:
           new_hash = deleted_hash
@@ -1655,22 +1663,22 @@ class FastExportFilter(object):
                                                    stdin=subprocess.PIPE,
                                                    stdout=subprocess.PIPE,
                                                    cwd=self._repo_working_dir)
-          batch_check_process.stdin.write(refname+"\n")
+          batch_check_process.stdin.write(refname+b"\n")
           batch_check_process.stdin.flush()
           line = batch_check_process.stdout.readline()
           m = batch_check_output_re.match(line)
-          if not m or m.group(2) != 'tag':
+          if not m or m.group(2) != b'tag':
             raise SystemExit(_("Failed to find new id for %(refname)s "
                                "(old id was %(old_hash)s)")
                              % ({'refname': refname, 'old_hash': old_hash})
                              ) # pragma: no cover
           new_hash = m.group(1)
-        f.write('{} {} {}\n'.format(old_hash, new_hash, refname))
+        f.write(b'%s %s %s\n' % (old_hash, new_hash, refname))
     if batch_check_process:
       batch_check_process.stdin.close()
       batch_check_process.wait()
 
-    with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'bw') as f:
+    with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f:
       issues_found = False
       if self._commits_no_longer_merges:
         issues_found = True
@@ -1680,10 +1688,10 @@ class FastExportFilter(object):
           are now regular commits; they likely have suboptimal commit messages
           (e.g. "Merge branch next into master").  Original commit hash on the
           left, commit hash after filtering/rewriting on the right:
-          ''')[1:]))
+          ''')[1:]).encode())
         for oldhash, newhash in self._commits_no_longer_merges:
-          f.write('  {} {}\n'.format(oldhash, newhash))
-        f.write('\n')
+          f.write('  {} {}\n'.format(oldhash, newhash).encode())
+        f.write(b'\n')
 
       if self._commits_referenced_but_removed:
         issues_found = True
@@ -1691,16 +1699,16 @@ class FastExportFilter(object):
           The following commits were filtered out, but referenced in another
           commit message.  The reference to the now-nonexistent commit hash
           (or a substring thereof) was left as-is in any commit messages:
-          ''')[1:]))
+          ''')[1:]).encode())
         for bad_commit_reference in self._commits_referenced_but_removed:
-          f.write('  {}\n'.format(bad_commit_reference))
-        f.write('\n')
+          f.write('  {}\n'.format(bad_commit_reference).encode())
+        f.write(b'\n')
 
       if not issues_found:
-        f.write(_("No filtering problems encountered."))
+        f.write(_("No filtering problems encountered.\n").encode())
 
-    with open(os.path.join(metadata_dir, 'already_ran'), 'bw') as f:
-       f.write(_("This file exists to allow you to filter again without --force."))
+    with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f:
+       f.write(_("This file exists to allow you to filter again without --force.\n").encode())
 
   def get_seen_refs(self):
     return self._seen_refs.keys()
@@ -1718,30 +1726,30 @@ class FastExportFilter(object):
     # Run over the input and do the filtering
     self._advance_currentline()
     while self._currentline:
-      if   self._currentline.startswith('blob'):
+      if   self._currentline.startswith(b'blob'):
         self._parse_blob()
-      elif self._currentline.startswith('reset'):
+      elif self._currentline.startswith(b'reset'):
         self._parse_reset()
-      elif self._currentline.startswith('commit'):
+      elif self._currentline.startswith(b'commit'):
         self._parse_commit()
-      elif self._currentline.startswith('tag'):
+      elif self._currentline.startswith(b'tag'):
         self._parse_tag()
-      elif self._currentline.startswith('progress'):
+      elif self._currentline.startswith(b'progress'):
         self._parse_progress()
-      elif self._currentline.startswith('checkpoint'):
+      elif self._currentline.startswith(b'checkpoint'):
         self._parse_checkpoint()
-      elif self._currentline.startswith('feature'):
+      elif self._currentline.startswith(b'feature'):
         self._parse_literal_command()
-      elif self._currentline.startswith('option'):
+      elif self._currentline.startswith(b'option'):
         self._parse_literal_command()
-      elif self._currentline.startswith('done'):
+      elif self._currentline.startswith(b'done'):
         self._handle_final_commands()
         self._parse_literal_command()
-      elif self._currentline.startswith('#'):
+      elif self._currentline.startswith(b'#'):
         self._parse_literal_command()
-      elif self._currentline.startswith('get-mark') or \
-           self._currentline.startswith('cat-blob') or \
-           self._currentline.startswith('ls'):
+      elif self._currentline.startswith(b'get-mark') or \
+           self._currentline.startswith(b'cat-blob') or \
+           self._currentline.startswith(b'ls'):
         raise SystemExit(_("Unsupported command: '%s'") % self._currentline)
       else:
         raise SystemExit(_("Could not parse line: '%s'") % self._currentline)
@@ -1798,13 +1806,13 @@ class GitUtils(object):
   def is_repository_bare(repo_working_dir):
     out = subprocess.check_output('git rev-parse --is-bare-repository'.split(),
                                   cwd=repo_working_dir)
-    return (out.strip() == 'true')
+    return (out.strip() == b'true')
 
   @staticmethod
   def determine_git_dir(repo_working_dir):
     d = subprocess.check_output('git rev-parse --git-dir'.split(),
                                 cwd=repo_working_dir).strip()
-    if repo_working_dir=='.' or d.startswith('/'):
+    if repo_working_dir==b'.' or d.startswith(b'/'):
       return d
     return os.path.join(repo_working_dir, d)
 
@@ -1841,12 +1849,12 @@ class FilteringOptions(object):
     def __call__(self, parser, namespace, values, option_string=None):
       af = FilteringOptions.AppendFilter(dest='path_changes',
                                          option_strings=None)
-      dirname = values if values[-1] == '/' else values+'/'
+      dirname = values if values[-1] == b'/' else values+b'/'
       if option_string == '--subdirectory-filter':
         af(parser, namespace, dirname,     '--path-match')
-        af(parser, namespace, dirname+':', '--path-rename')
+        af(parser, namespace, dirname+b':', '--path-rename')
       elif option_string == '--to-subdirectory-filter':
-        af(parser, namespace, ':'+dirname, '--path-rename')
+        af(parser, namespace, b':'+dirname, '--path-rename')
       else:
         raise SystemExit(_("Error: HelperFilter given invalid option_string: %s")
                          % option_string) # pragma: no cover
@@ -2047,7 +2055,7 @@ class FilteringOptions(object):
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     p.wait()
     output = p.stdout.read()
-    if '--combined-all-paths' not in output:
+    if b'--combined-all-paths' not in output:
       raise SystemExit(_("Error: need a version of git whose diff-tree command "
                          "has the --combined-all-paths option")) # pragma: no cover
 
@@ -2057,24 +2065,24 @@ class FilteringOptions(object):
     replace_regexes = []
     with open(filename, 'br') as f:
       for line in f:
-        line = line.rstrip('\r\n')
+        line = line.rstrip(b'\r\n')
 
         # Determine the replacement
-        replacement = '***REMOVED***'
-        if '==>' in line:
-          line, replacement = line.rsplit('==>', 1)
+        replacement = b'***REMOVED***'
+        if b'==>' in line:
+          line, replacement = line.rsplit(b'==>', 1)
 
         # See if we need to match via regex
         regex = None
-        if line.startswith('regex:'):
+        if line.startswith(b'regex:'):
           regex = line[6:]
-        elif line.startswith('glob:'):
+        elif line.startswith(b'glob:'):
           regex = glob_to_regex(line[5:])
         if regex:
           replace_regexes.append((re.compile(regex), replacement))
         else:
           # Otherwise, find the literal we need to replace
-          if line.startswith('literal:'):
+          if line.startswith(b'literal:'):
             line = line[8:]
           if not line:
             continue
@@ -2149,7 +2157,7 @@ class RepoAnalyze(object):
     # Figure out kind of deletions to undo for this file, and update lists
     # of all-names-by-sha and all-filenames
     delmode = 'tree_deletions'
-    if mode != '040000':
+    if mode != b'040000':
       delmode = 'file_deletions'
       stats['names'][sha].add(filename)
       stats['allnames'].add(filename)
@@ -2179,22 +2187,22 @@ class RepoAnalyze(object):
     graph.add_commit_and_parents(commit, parents)
     for change in file_changes:
       modes, shas, change_types, filenames = change
-      if len(parents) == 1 and change_types.startswith('R'):
-        change_types = 'R'  # remove the rename score; we don't care
-      if modes[-1] == '160000':
+      if len(parents) == 1 and change_types.startswith(b'R'):
+        change_types = b'R'  # remove the rename score; we don't care
+      if modes[-1] == b'160000':
         continue
-      elif modes[-1] == '000000':
+      elif modes[-1] == b'000000':
         # Track when files/directories are deleted
         for f in RepoAnalyze.equiv_class(stats, filenames[-1]):
-          if any(x == '040000' for x in modes[0:-1]):
+          if any(x == b'040000' for x in modes[0:-1]):
             stats['tree_deletions'][f] = date
           else:
             stats['file_deletions'][f] = date
-      elif change_types.strip('AMT') == '':
+      elif change_types.strip(b'AMT') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
-      elif modes[-1] == '040000' and change_types.strip('RAM') == '':
+      elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
-      elif change_types.strip('RAM') == '':
+      elif change_types.strip(b'RAM') == b'':
         RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)
         RepoAnalyze.handle_renames(stats, commit, change_types, filenames)
       else:
@@ -2219,7 +2227,7 @@ class RepoAnalyze(object):
     for line in cf.stdout:
       sha, objtype, objsize, objdisksize = line.split()
       objsize, objdisksize = int(objsize), int(objdisksize)
-      if objtype == 'blob':
+      if objtype == b'blob':
         unpacked_size[sha] = objsize
         packed_size[sha] = objdisksize
       num_blobs += 1
@@ -2269,17 +2277,17 @@ class RepoAnalyze(object):
       if cont and not line:
         cont = False
         for line in f:
-          if not line.startswith(':'):
+          if not line.startswith(b':'):
             cont = True
             break
           n = 1+max(1, len(parents))
-          assert line.startswith(':'*(n-1))
+          assert line.startswith(b':'*(n-1))
           relevant = line[n-1:-1]
           splits = relevant.split(None, n)
           modes = splits[0:n]
           splits = splits[n].split(None, n)
           shas = splits[0:n]
-          splits = splits[n].split('\t')
+          splits = splits[n].split(b'\t')
           change_types = splits[0]
           filenames = [PathQuoting.dequote(x) for x in splits[1:]]
           file_changes.append([modes, shas, change_types, filenames])
@@ -2304,13 +2312,13 @@ class RepoAnalyze(object):
   @staticmethod
   def write_report(reportdir, stats):
     def datestr(datetimestr):
-      return datetimestr if datetimestr else _('<present>')
+      return datetimestr if datetimestr else _('<present>').encode()
 
     def dirnames(path):
       while True:
         path = os.path.dirname(path)
         yield path
-        if path == '':
+        if path == b'':
           break
 
     # Compute aggregate size information for paths, extensions, and dirs
@@ -2352,27 +2360,27 @@ class RepoAnalyze(object):
     for name in dir_size['packed']:
       dir_deleted_data[name] = stats['tree_deletions'].get(name, None)
 
-    with open(os.path.join(reportdir, "README"), 'bw') as f:
+    with open(os.path.join(reportdir, b"README"), 'bw') as f:
       # Give a basic overview of this file
-      f.write("== %s ==\n" % _("Overall Statistics"))
-      f.write("  %s: %d\n" % (_("Number of commits"),
-                              stats['num_commits']))
-      f.write("  %s: %d\n" % (_("Number of filenames"),
-                              len(path_size['packed'])))
-      f.write("  %s: %d\n" % (_("Number of directories"),
-                              len(dir_size['packed'])))
-      f.write("  %s: %d\n" % (_("Number of file extensions"),
-                              len(ext_size['packed'])))
-      f.write("\n")
-      f.write("  %s: %d\n" % (_("Total unpacked size (bytes)"),
-                              total_size['unpacked']))
-      f.write("  %s: %d\n" % (_("Total packed size (bytes)"),
-                              total_size['packed']))
-      f.write("\n")
+      f.write(b"== %s ==\n" % _("Overall Statistics").encode())
+      f.write(("  %s: %d\n" % (_("Number of commits"),
+                               stats['num_commits'])).encode())
+      f.write(("  %s: %d\n" % (_("Number of filenames"),
+                               len(path_size['packed']))).encode())
+      f.write(("  %s: %d\n" % (_("Number of directories"),
+                               len(dir_size['packed']))).encode())
+      f.write(("  %s: %d\n" % (_("Number of file extensions"),
+                               len(ext_size['packed']))).encode())
+      f.write(b"\n")
+      f.write(("  %s: %d\n" % (_("Total unpacked size (bytes)"),
+                               total_size['unpacked'])).encode())
+      f.write(("  %s: %d\n" % (_("Total packed size (bytes)"),
+                               total_size['packed'])).encode())
+      f.write(b"\n")
 
       # Mention issues with the report
-      f.write("== %s ==\n" % _("Caveats"))
-      f.write("=== %s ===\n" % _("Sizes"))
+      f.write(("== %s ==\n" % _("Caveats")).encode())
+      f.write(("=== %s ===\n" % _("Sizes")).encode())
       f.write(textwrap.dedent(_("""
         Packed size represents what size your repository would be if no
         trees, commits, tags, or other metadata were included (though it may
@@ -2400,9 +2408,9 @@ class RepoAnalyze(object):
         ever reverted to a previous version's contents, the previous
         version's size will be counted multiple times in this analysis, even
         though git will only store it once.
-        """)[1:]))
-      f.write("\n")
-      f.write("=== %s ===\n" % _("Deletions"))
+        """)[1:]).encode())
+      f.write(b"\n")
+      f.write(("=== %s ===\n" % _("Deletions")).encode())
       f.write(textwrap.dedent(_("""
         Whether a file is deleted is not a binary quality, since it can be
         deleted on some branches but still exist in others.  Also, it might
@@ -2418,9 +2426,9 @@ class RepoAnalyze(object):
           stream that mentions the file lists it as deleted.
         This makes it dependent on topological ordering, but generally gives
         the "right" answer.
-        """)[1:]))
-      f.write("\n")
-      f.write("=== %s ===\n" % _("Renames"))
+        """)[1:]).encode())
+      f.write(b"\n")
+      f.write(("=== %s ===\n" % _("Renames")).encode())
       f.write(textwrap.dedent(_("""
         Renames share the same non-binary nature that deletions do, plus
         additional challenges:
@@ -2436,101 +2444,105 @@ class RepoAnalyze(object):
           * The ability for users to rename files differently in different
             branches means that our chains of renames will not necessarily be
             linear but may branch out.
-        """)[1:]))
-      f.write("\n")
+        """)[1:]).encode())
+      f.write(b"\n")
 
     # Equivalence classes for names, so if folks only want to keep a
     # certain set of paths, they know the old names they want to include
     # too.
-    with open(os.path.join(reportdir, "renames.txt"), 'bw') as f:
+    with open(os.path.join(reportdir, b"renames.txt"), 'bw') as f:
       seen = set()
       for pathname,equiv_group in sorted(stats['equivalence'].items(),
                                          key=lambda x:(x[1], x[0])):
         if equiv_group in seen:
           continue
         seen.add(equiv_group)
-        f.write("{} ->\n    ".format(decode(equiv_group[0])) +
+        f.write(("{} ->\n    ".format(decode(equiv_group[0])) +
                      "\n    ".join(decode(x) for x in equiv_group[1:]) +
-                "\n")
+                 "\n").encode())
 
     # List directories in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "directories-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted directories by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
+    with open(os.path.join(reportdir, b"directories-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted directories by reverse size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
+      f.write(msg.encode())
       for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (dir_deleted_data[dirname]):
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(dir_size['unpacked'][dirname],
-                          size,
-                          datestr(dir_deleted_data[dirname]),
-                          dirname or _('<toplevel>')))
+          f.write(b"  %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
+                                              size,
+                                              datestr(dir_deleted_data[dirname]),
+                                              dirname or _('<toplevel>').encode()))
 
-    with open(os.path.join(reportdir, "directories-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All directories by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, directory name\n"))
+    with open(os.path.join(reportdir, b"directories-all-sizes.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("All directories by reverse size")).encode())
+      msg = _("Format: unpacked size, packed size, date deleted, directory name\n")
+      f.write(msg.encode())
       for dirname, size in sorted(dir_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(dir_size['unpacked'][dirname],
-                        size,
-                        datestr(dir_deleted_data[dirname]),
-                        dirname or _("<toplevel>")))
+        f.write(b"  %10d %10d %-10s %s\n" % (dir_size['unpacked'][dirname],
+                                            size,
+                                            datestr(dir_deleted_data[dirname]),
+                                            dirname or _("<toplevel>").encode()))
 
     # List extensions in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "extensions-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted extensions by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
+    with open(os.path.join(reportdir, b"extensions-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted extensions by reverse size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
+      f.write(msg.encode())
       for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
         if (ext_deleted_data[extname]):
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(ext_size['unpacked'][extname],
-                          size,
-                          datestr(ext_deleted_data[extname]),
-                          extname or _('<no extension>')))
+          f.write(b"  %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
+                                              size,
+                                              datestr(ext_deleted_data[extname]),
+                                              extname or _('<no extension>').encode()))
 
-    with open(os.path.join(reportdir, "extensions-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All extensions by reverse size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, extension name\n"))
+    with open(os.path.join(reportdir, b"extensions-all-sizes.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("All extensions by reverse size")).encode())
+      msg = _("Format: unpacked size, packed size, date deleted, extension name\n")
+      f.write(msg.encode())
       for extname, size in sorted(ext_size['packed'].items(),
                                   key=lambda x:(x[1],x[0]), reverse=True):
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(ext_size['unpacked'][extname],
-                        size,
-                        datestr(ext_deleted_data[extname]),
-                        extname or _('<no extension>')))
+        f.write(b"  %10d %10d %-10s %s\n" % (ext_size['unpacked'][extname],
+                                            size,
+                                            datestr(ext_deleted_data[extname]),
+                                            extname or _('<no extension>').encode()))
 
     # List files in reverse sorted order of unpacked size
-    with open(os.path.join(reportdir, "path-deleted-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Deleted paths by reverse accumulated size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, path name(s)\n"))
+    with open(os.path.join(reportdir, b"path-deleted-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("Deleted paths by reverse accumulated size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, path name(s)\n")
+      f.write(msg.encode())
       for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
         if when:
-          f.write("  {:10d} {:10d} {:10s} {}\n"
-                  .format(path_size['unpacked'][pathname],
-                          size,
-                          datestr(when),
-                          pathname))
+          f.write(b"  %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
+                                              size,
+                                              datestr(when),
+                                              pathname))
 
-    with open(os.path.join(reportdir, "path-all-sizes.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("All paths by reverse accumulated size"))
-      f.write(_("Format: unpacked size, packed size, date deleted, pathectory name\n"))
+    with open(os.path.join(reportdir, b"path-all-sizes.txt"), 'bw') as f:
+      msg = "=== %s ===\n" % _("All paths by reverse accumulated size")
+      f.write(msg.encode())
+      msg = _("Format: unpacked size, packed size, date deleted, pathectory name\n")
+      f.write(msg.encode())
       for pathname, size in sorted(path_size['packed'].items(),
                                    key=lambda x:(x[1],x[0]), reverse=True):
         when = stats['file_deletions'].get(pathname, None)
-        f.write("  {:10d} {:10d} {:10s} {}\n"
-                .format(path_size['unpacked'][pathname],
-                        size,
-                        datestr(when),
-                        pathname))
+        f.write(b"  %10d %10d %-10s %s\n" % (path_size['unpacked'][pathname],
+                                            size,
+                                            datestr(when),
+                                            pathname))
 
     # List of filenames and sizes in descending order
-    with open(os.path.join(reportdir, "blob-shas-and-paths.txt"), 'bw') as f:
-      f.write("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size"))
-      f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n"))
+    with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f:
+      f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode())
+      f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode())
       for sha, size in sorted(stats['packed_size'].items(),
                               key=lambda x:(x[1],x[0]), reverse=True):
         if sha not in stats['names']:
@@ -2541,21 +2553,21 @@ class RepoAnalyze(object):
         if len(names_with_sha) == 1:
           names_with_sha = names_with_sha.pop()
         else:
-          names_with_sha = sorted(list(names_with_sha))
-        f.write("  {} {:10d} {:10d} {}\n".format(sha,
-                                                 stats['unpacked_size'][sha],
-                                                 size,
-                                                 names_with_sha))
+          names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']'
+        f.write(b"  %s %10d %10d %s\n" % (sha,
+                                          stats['unpacked_size'][sha],
+                                          size,
+                                          names_with_sha))
 
   @staticmethod
   def run(args):
-    git_dir = GitUtils.determine_git_dir('.')
+    git_dir = GitUtils.determine_git_dir(b'.')
 
     # Create the report directory as necessary
-    results_tmp_dir = os.path.join(git_dir, 'filter-repo')
+    results_tmp_dir = os.path.join(git_dir, b'filter-repo')
     if not os.path.isdir(results_tmp_dir):
       os.mkdir(results_tmp_dir)
-    reportdir = os.path.join(results_tmp_dir, "analysis")
+    reportdir = os.path.join(results_tmp_dir, b"analysis")
     if not args.force and os.path.isdir(reportdir):
       shutil.rmtree(reportdir)
     os.mkdir(reportdir)
@@ -2693,7 +2705,7 @@ class RepoFilter(object):
     # Do sanity checks from the correct directory
     tmp_dir = self.results_tmp_dir(create_if_missing=False)
     if not self._args.force and \
-       not os.path.isfile(os.path.join(tmp_dir, 'already_ran')):
+       not os.path.isfile(os.path.join(tmp_dir, b'already_ran')):
       cwd = os.getcwd()
       os.chdir(target_working_dir)
       RepoFilter.sanity_check(self._orig_refs, is_bare)
@@ -2710,27 +2722,27 @@ class RepoFilter(object):
 
     # Make sure repo is fully packed, just like a fresh clone would be
     output = subprocess.check_output('git count-objects -v'.split())
-    stats = dict(x.split(': ') for x in output.splitlines())
-    num_packs = int(stats['packs'])
-    if stats['count'] != '0' or num_packs > 1:
+    stats = dict(x.split(b': ') for x in output.splitlines())
+    num_packs = int(stats[b'packs'])
+    if stats[b'count'] != b'0' or num_packs > 1:
       abort(_("expected freshly packed repo"))
 
     # Make sure there is precisely one remote, named "origin"...or that this
     # is a new bare repo with no packs and no remotes
     output = subprocess.check_output('git remote'.split()).strip()
-    if not (output == "origin" or (num_packs == 0 and not output)):
+    if not (output == b"origin" or (num_packs == 0 and not output)):
       abort(_("expected one remote, origin"))
 
     # Avoid letting people running with weird setups and overwriting GIT_DIR
     # elsewhere
-    git_dir = GitUtils.determine_git_dir('.')
-    if is_bare and git_dir != '.':
+    git_dir = GitUtils.determine_git_dir(b'.')
+    if is_bare and git_dir != b'.':
       abort(_("GIT_DIR must be ."))
-    elif not is_bare and git_dir != '.git':
+    elif not is_bare and git_dir != b'.git':
       abort(_("GIT_DIR must be .git"))
 
     # Make sure that all reflogs have precisely one entry
-    reflog_dir=os.path.join(git_dir, 'logs')
+    reflog_dir=os.path.join(git_dir, b'logs')
     for root, dirs, files in os.walk(reflog_dir):
       for filename in files:
         pathname = os.path.join(root, filename)
@@ -2741,7 +2753,7 @@ class RepoFilter(object):
                   decode(shortpath))
 
     # Make sure there are no stashed changes
-    if 'refs/stash' in refs:
+    if b'refs/stash' in refs:
       abort(_("has stashed changes"))
 
     # Do extra checks in non-bare repos
@@ -2756,9 +2768,9 @@ class RepoFilter(object):
 
       # Avoid unpushed changes
       for refname, rev in refs.items():
-        if not refname.startswith('refs/heads/'):
+        if not refname.startswith(b'refs/heads/'):
           continue
-        origin_ref = refname.replace('refs/heads/', 'refs/remotes/origin/')
+        origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/')
         if origin_ref not in refs:
           abort(_('%s exists, but %s not found') % (decode(refname),
                                                     decode(origin_ref)))
@@ -2776,13 +2788,13 @@ class RepoFilter(object):
 
   def tweak_commit(self, commit):
     def filename_matches(path_expression, pathname):
-      if path_expression == '':
+      if path_expression == b'':
         return True
       n = len(path_expression)
       if (pathname.startswith(path_expression) and
-          (path_expression[n-1:n] == '/' or
+          (path_expression[n-1:n] == b'/' or
            len(pathname) == n or
-           pathname[n:n+1] == '/')):
+           pathname[n:n+1] == b'/')):
         return True
       return False
 
@@ -2798,7 +2810,7 @@ class RepoFilter(object):
           if match_type == 'regex' and path_exp.search(pathname):
             wanted = True
         elif mod_type == 'rename':
-          old_exp, new_exp = path_exp.split(':')
+          old_exp, new_exp = path_exp.split(b':')
           assert match_type in ('prefix',)
           if match_type == 'prefix' and pathname.startswith(old_exp):
             pathname = pathname.replace(old_exp, new_exp, 1)
@@ -2866,15 +2878,15 @@ class RepoFilter(object):
         #      in sync with the original with any changes, and then decides
         #      they want to rewrite history to only have one of the two files)
         colliding_change = new_file_changes[change.filename]
-        if change.type == 'D':
+        if change.type == b'D':
           # We can just throw this one away and keep the other
           continue
-        elif change.type == 'M' and (
+        elif change.type == b'M' and (
             change.mode == colliding_change.mode and
             change.blob_id == colliding_change.blob_id):
           # The two are identical, so we can throw this one away and keep other
           continue
-        elif new_file_changes[change.filename].type != 'D':
+        elif new_file_changes[change.filename].type != b'D':
           raise SystemExit(_("File renaming caused colliding pathnames!\n") +
                            _("  Commit: {}\n").format(commit.original_id) +
                            _("  Filename: {}").format(change.filename))
@@ -2883,8 +2895,8 @@ class RepoFilter(object):
 
   @staticmethod
   def do_tag_rename(rename_pair, tagname):
-    old, new = rename_pair.split(':', 1)
-    old, new = 'refs/tags/'+old, 'refs/tags/'+new
+    old, new = rename_pair.split(b':', 1)
+    old, new = b'refs/tags/'+old, b'refs/tags/'+new
     if tagname.startswith(old):
       return tagname.replace(old, new, 1)
     return tagname
@@ -2895,7 +2907,7 @@ class RepoFilter(object):
       tag.message = self._message_callback(tag.message)
 
     # Tweak the tag name according to callbacks
-    tag_prefix = 'refs/tags/'
+    tag_prefix = b'refs/tags/'
     fullref = tag_prefix+tag.ref
     if self._args.tag_rename:
       fullref = RepoFilter.do_tag_rename(self._args.tag_rename, fullref)
@@ -2923,9 +2935,9 @@ class RepoFilter(object):
       reset.ref = self._refname_callback(reset.ref)
 
   def results_tmp_dir(self, create_if_missing=True):
-    working_dir = self._args.target or self._args.source or '.'
+    working_dir = self._args.target or self._args.source or b'.'
     git_dir = GitUtils.determine_git_dir(working_dir)
-    d = os.path.join(git_dir, 'filter-repo')
+    d = os.path.join(git_dir, b'filter-repo')
     if create_if_missing and not os.path.isdir(d):
       os.mkdir(d)
     return d
@@ -2970,7 +2982,7 @@ class RepoFilter(object):
       self._input = self._fep.stdout
       if self._args.dry_run or self._args.debug:
         self._fe_orig = os.path.join(self.results_tmp_dir(),
-                                     'fast-export.original')
+                                     b'fast-export.original')
         output = open(self._fe_orig, 'bw')
         self._input = InputFileBackup(self._input, output)
         if self._args.debug:
@@ -2989,7 +3001,7 @@ class RepoFilter(object):
       self._import_pipes = (self._fip.stdin, self._fip.stdout)
     if self._args.dry_run or self._args.debug:
       self._fe_filt = os.path.join(self.results_tmp_dir(),
-                                   'fast-export.filtered')
+                                   b'fast-export.filtered')
       self._output = open(self._fe_filt, 'bw')
     else:
       self._output = self._fip.stdin
@@ -3003,7 +3015,7 @@ class RepoFilter(object):
     if self._args.dry_run:
       return
     refs_to_migrate = set(x for x in self._orig_refs
-                          if x.startswith('refs/remotes/origin/'))
+                          if x.startswith(b'refs/remotes/origin/'))
     if not refs_to_migrate:
       return
     if self._args.debug:
@@ -3013,14 +3025,14 @@ class RepoFilter(object):
                          stdin=subprocess.PIPE,
                          cwd=target_working_dir)
     for ref in refs_to_migrate:
-      if ref == 'refs/remotes/origin/HEAD':
-        p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref]))
+      if ref == b'refs/remotes/origin/HEAD':
+        p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
         del self._orig_refs[ref]
         continue
-      newref = ref.replace('refs/remotes/origin/', 'refs/heads/')
+      newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/')
       if newref not in self._orig_refs:
-        p.stdin.write('create {} {}\n'.format(newref, self._orig_refs[ref]))
-      p.stdin.write('delete {} {}\n'.format(ref, self._orig_refs[ref]))
+        p.stdin.write(b'create %s %s\n' % (newref, self._orig_refs[ref]))
+      p.stdin.write(b'delete %s %s\n' % (ref, self._orig_refs[ref]))
       self._orig_refs[newref] = self._orig_refs[ref]
       del self._orig_refs[ref]
     p.stdin.close()
@@ -3115,11 +3127,11 @@ class RepoFilter(object):
       if refs_to_nuke:
         if self._args.debug:
           print("[DEBUG] Deleting the following refs:\n  "+
-                decode("\n  ".join(refs_to_nuke)))
+                decode(b"\n  ".join(refs_to_nuke)))
         p = subprocess.Popen('git update-ref --stdin'.split(),
                              stdin=subprocess.PIPE,
                              cwd=target_working_dir)
-        p.stdin.write(''.join(["option no-deref\ndelete {}\n".format(x)
+        p.stdin.write(b''.join([b"option no-deref\ndelete %s\n" % x
                                for x in refs_to_nuke]))
         p.stdin.close()
         if p.wait():
diff --git a/t/t9390-filter-repo.sh b/t/t9390-filter-repo.sh
index 52221d1..8a674ab 100755
--- a/t/t9390-filter-repo.sh
+++ b/t/t9390-filter-repo.sh
@@ -450,15 +450,15 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
 		head -n 9 README >actual &&
 		test_cmp expect actual &&
 
-		cat | tr Q "\047" >expect <<-\EOF &&
+		cat >expect <<-\EOF &&
 		=== Files by sha and associated pathnames in reverse size ===
 		Format: sha, unpacked size, packed size, filename(s) object stored as
 		  a89c82a2d4b713a125a4323d25adda062cc0013d         44         48 numbers/medium.num
 		  f00c965d8307308469e537302baa73048488f162         21         37 numbers/small.num
 		  2aa69a2a708eed00cb390e30f6bcc3eed773f390         20         36 whatever
-		  51b95456de9274c9a95f756742808dfd480b9b35         13         29 [QcapriciousQ, QfickleQ, QmercurialQ]
-		  732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0          5         20 [Qsequence/knowQ, Qwords/knowQ]
-		  34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373          5         20 [Qsequence/toQ, Qwords/toQ]
+		  51b95456de9274c9a95f756742808dfd480b9b35         13         29 [capricious, fickle, mercurial]
+		  732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0          5         20 [sequence/know, words/know]
+		  34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373          5         20 [sequence/to, words/to]
 		  7ecb56eb3fa3fa6f19dd48bca9f971950b119ede          3         18 words/know
 		EOF
 		test_cmp expect blob-shas-and-paths.txt &&
@@ -795,7 +795,7 @@ test_expect_success 'incremental import' '
 
 		original=$(git rev-parse master) &&
 		git fast-export --reference-excluded-parents master~2..master \
-			| git filter-repo --stdin --refname-callback "return \"develop\"" &&
+			| git filter-repo --stdin --refname-callback "return b\"develop\"" &&
 		test "$(git rev-parse develop)" = "$original"
 	)
 '
diff --git a/t/t9391/commit_info.py b/t/t9391/commit_info.py
index a0d34f3..01fd725 100755
--- a/t/t9391/commit_info.py
+++ b/t/t9391/commit_info.py
@@ -13,12 +13,12 @@ import git_filter_repo as fr
 
 def change_up_them_commits(commit):
   # Change the commit author
-  if commit.author_name == "Copy N. Paste":
-    commit.author_name = "Ima L. Oser"
-    commit.author_email = "aloser@my.corp"
+  if commit.author_name == b"Copy N. Paste":
+    commit.author_name = b"Ima L. Oser"
+    commit.author_email = b"aloser@my.corp"
 
   # Fix the author email
-  commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email)
+  commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email)
 
   # Fix the committer date (bad timezone conversion in initial import)
   oldtime = fr.string_to_date(commit.committer_date)
@@ -26,7 +26,7 @@ def change_up_them_commits(commit):
   commit.committer_date = fr.date_to_string(newtime)
 
   # Fix the commit message
-  commit.message = re.sub("Marketing is staffed with pansies", "",
+  commit.message = re.sub(b"Marketing is staffed with pansies", b"",
                           commit.message)
 
 args = fr.FilteringOptions.parse_args(['--force'])
diff --git a/t/t9391/create_fast_export_output.py b/t/t9391/create_fast_export_output.py
index e2ef13c..1eb0a3d 100755
--- a/t/t9391/create_fast_export_output.py
+++ b/t/t9391/create_fast_export_output.py
@@ -23,82 +23,82 @@ out.importer_only()
 
 output = out._output
 
-world = Blob("Hello")
+world = Blob(b"Hello")
 world.dump(output)
 
-bar = Blob("foo\n")
+bar = Blob(b"foo\n")
 bar.dump(output)
 
-master = Reset("refs/heads/master")
+master = Reset(b"refs/heads/master")
 master.dump(output)
 
-changes = [FileChanges('M', 'world', world.id, mode="100644"),
-           FileChanges('M', 'bar',   bar.id,   mode="100644")]
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
+           FileChanges(b'M', b'bar',   bar.id,   mode=b"100644")]
 when = datetime(year=2005, month=4, day=7,
                 hour=15, minute=16, second=10,
-                tzinfo=FixedTimeZone("-0700"))
+                tzinfo=FixedTimeZone(b"-0700"))
 when_string = fr.date_to_string(when)
-commit1 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "My first commit!  Wooot!\n\nLonger description",
+commit1 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"My first commit!  Wooot!\n\nLonger description",
                  changes,
                  parents = [])
 commit1.dump(output)
 
-world = Blob("Hello\nHi")
+world = Blob(b"Hello\nHi")
 world.dump(output)
-world_link = Blob("world")
+world_link = Blob(b"world")
 world_link.dump(output)
 
-changes = [FileChanges('M', 'world',  world.id,      mode="100644"),
-           FileChanges('M', 'planet', world_link.id, mode="120000")]
+changes = [FileChanges(b'M', b'world',  world.id,      mode=b"100644"),
+           FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
 when += timedelta(days=3, hours=4, minutes=6)
 when_string = fr.date_to_string(when)
-commit2 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Make a symlink to world called planet, modify world",
+commit2 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Make a symlink to world called planet, modify world",
                  changes,
                  parents = [commit1.id])
 commit2.dump(output)
 
-script = Blob("#!/bin/sh\n\necho Hello")
+script = Blob(b"#!/bin/sh\n\necho Hello")
 script.dump(output)
-changes = [FileChanges('M', 'runme', script.id, mode="100755"),
-           FileChanges('D', 'bar')]
-when_string = "1234567890 -0700"
-commit3 = Commit("refs/heads/master",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Add runme script, remove bar",
+changes = [FileChanges(b'M', b'runme', script.id, mode=b"100755"),
+           FileChanges(b'D', b'bar')]
+when_string = b"1234567890 -0700"
+commit3 = Commit(b"refs/heads/master",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Add runme script, remove bar",
                  changes,
                  parents = [commit2.id])
 commit3.dump(output)
 
-progress = Progress("Done with the master branch now...")
+progress = Progress(b"Done with the master branch now...")
 progress.dump(output)
 checkpoint = Checkpoint()
 checkpoint.dump(output)
 
-devel = Reset("refs/heads/devel", commit1.id)
+devel = Reset(b"refs/heads/devel", commit1.id)
 devel.dump(output)
 
-world = Blob("Hello\nGoodbye")
+world = Blob(b"Hello\nGoodbye")
 world.dump(output)
 
-changes = [FileChanges('M', 'world', world.id, mode="100644")]
-when = datetime(2006, 8, 17, tzinfo=FixedTimeZone("+0200"))
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644")]
+when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200"))
 when_string = fr.date_to_string(when)
-commit4 = Commit("refs/heads/devel",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Modify world",
+commit4 = Commit(b"refs/heads/devel",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Modify world",
                  changes,
                  parents = [commit1.id])
 commit4.dump(output)
 
-world = Blob("Hello\nHi\nGoodbye")
+world = Blob(b"Hello\nHi\nGoodbye")
 world.dump(output)
 when = fr.string_to_date(commit3.author_date) + timedelta(days=47)
 when_string = fr.date_to_string(when)
@@ -106,22 +106,22 @@ when_string = fr.date_to_string(when)
 # to the first parent.  Thus, despite the fact that runme and planet have
 # not changed and bar was not modified in the devel side, we have to list them
 # all anyway.
-changes = [FileChanges('M', 'world', world.id, mode="100644"),
-           FileChanges('D', 'bar'),
-           FileChanges('M', 'runme', script.id, mode="100755"),
-           FileChanges('M', 'planet', world_link.id, mode="120000")]
+changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
+           FileChanges(b'D', b'bar'),
+           FileChanges(b'M', b'runme', script.id, mode=b"100755"),
+           FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
 
-commit5 = Commit("refs/heads/devel",
-                 "A U Thor", "au@thor.email", when_string,
-                 "Com M. Iter", "comm@iter.email", when_string,
-                 "Merge branch 'master'\n",
+commit5 = Commit(b"refs/heads/devel",
+                 b"A U Thor", b"au@thor.email", when_string,
+                 b"Com M. Iter", b"comm@iter.email", when_string,
+                 b"Merge branch 'master'\n",
                  changes,
                  parents = [commit4.id, commit3.id])
 commit5.dump(output)
 
 
-mytag = Tag("refs/tags/v1.0", commit5.id,
-            "His R. Highness", "royalty@my.kingdom", when_string,
-            "I bequeath to my peons this royal software")
+mytag = Tag(b"refs/tags/v1.0", commit5.id,
+            b"His R. Highness", b"royalty@my.kingdom", when_string,
+            b"I bequeath to my peons this royal software")
 mytag.dump(output)
 out.finish()
diff --git a/t/t9391/file_filter.py b/t/t9391/file_filter.py
index 8540b7d..c3683fc 100755
--- a/t/t9391/file_filter.py
+++ b/t/t9391/file_filter.py
@@ -15,14 +15,14 @@ import sys
 import git_filter_repo as fr
 
 def drop_file_by_contents(blob):
-  bad_file_contents = 'The launch code is 1-2-3-4.'
+  bad_file_contents = b'The launch code is 1-2-3-4.'
   if blob.data == bad_file_contents:
     blob.skip()
 
 def drop_files_by_name(commit):
   new_file_changes = []
   for change in commit.file_changes:
-    if not change.filename.endswith('.doc'):
+    if not change.filename.endswith(b'.doc'):
       new_file_changes.append(change)
   commit.file_changes = new_file_changes
 
diff --git a/t/t9391/rename-master-to-develop.py b/t/t9391/rename-master-to-develop.py
index 7a922d0..1acfef8 100755
--- a/t/t9391/rename-master-to-develop.py
+++ b/t/t9391/rename-master-to-develop.py
@@ -14,8 +14,8 @@ not try to handle any such special cases.
 import git_filter_repo as fr
 
 def my_commit_callback(commit):
-  if commit.branch == "refs/heads/master":
-    commit.branch = "refs/heads/develop"
+  if commit.branch == b"refs/heads/master":
+    commit.branch = b"refs/heads/develop"
 
 args = fr.FilteringOptions.default_options()
 args.force = True
diff --git a/t/t9391/splice_repos.py b/t/t9391/splice_repos.py
index 133044e..5993436 100755
--- a/t/t9391/splice_repos.py
+++ b/t/t9391/splice_repos.py
@@ -29,11 +29,11 @@ class InterleaveRepositories:
 
   def hold_commit(self, commit):
     commit.skip(new_id = commit.id)
-    letter = re.match('Commit (.)', commit.message).group(1)
+    letter = re.match(b'Commit (.)', commit.message).group(1)
     self.commit_map[letter] = commit
 
   def weave_commit(self, commit):
-    letter = re.match('Commit (.)', commit.message).group(1)
+    letter = re.match(b'Commit (.)', commit.message).group(1)
     prev_letter = bytes([ord(letter)-1])
 
     # Splice in any extra commits needed
@@ -53,10 +53,10 @@ class InterleaveRepositories:
       fr.record_id_rename(new_commit.id, commit.id)
 
   def run(self):
-    blob = fr.Blob('public gpg key contents')
-    tag = fr.Tag('gpg-pubkey', blob.id,
-                 'Ima Tagger', 'ima@tagg.er', '1136199845 +0300',
-                 'Very important explanation and stuff')
+    blob = fr.Blob(b'public gpg key contents')
+    tag = fr.Tag(b'gpg-pubkey', blob.id,
+                 b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',
+                 b'Very important explanation and stuff')
 
     args = fr.FilteringOptions.parse_args(['--target', self.output_dir])
     out = fr.RepoFilter(args)
diff --git a/t/t9391/strip-cvs-keywords.py b/t/t9391/strip-cvs-keywords.py
index ccd3c8d..ae7cda0 100755
--- a/t/t9391/strip-cvs-keywords.py
+++ b/t/t9391/strip-cvs-keywords.py
@@ -18,8 +18,8 @@ def strip_cvs_keywords(blob):
   # FIXME: Should first check if blob is a text file to avoid ruining
   # binaries.  Could use python.magic here, or just output blob.data to
   # the unix 'file' command
-  pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
-  replacement = r'$\1$'
+  pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
+  replacement = br'$\1$'
   blob.data = re.sub(pattern, replacement, blob.data)
 
 args = fr.FilteringOptions.parse_args(['--force'])
diff --git a/t/t9391/unusual.py b/t/t9391/unusual.py
index da0cf89..190f82b 100755
--- a/t/t9391/unusual.py
+++ b/t/t9391/unusual.py
@@ -21,7 +21,7 @@ import textwrap
 import git_filter_repo as fr
 
 def handle_progress(progress):
-  print("Decipher this: "+bytes(reversed(progress.message)))
+  print(b"Decipher this: "+bytes(reversed(progress.message)))
 
 def handle_checkpoint(checkpoint_object):
   # Flip a coin; see if we want to pass the checkpoint through.
@@ -44,8 +44,8 @@ def track_everything(obj):
     # projects, I'm just verifying an invariant of the current code.
     assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]
 
-mystr = 'This is the contents of the blob'
-compare = "Blob:\n  blob\n  mark :1\n  data {}\n  {}".format(len(mystr), mystr)
+mystr = b'This is the contents of the blob'
+compare = b"Blob:\n  blob\n  mark :1\n  data %d\n  %s" % (len(mystr), mystr)
 # Next line's only purpose is testing code coverage of something that helps
 # debugging git-filter-repo; it is NOT something external folks should depend
 # upon.
@@ -102,14 +102,14 @@ stream = io.BytesIO(textwrap.dedent('''
   from :3
   M 100644 :1 salutation
 
-  '''[1:]))
+  '''[1:]).encode())
 
 counts = collections.Counter()
 def look_for_reset(obj):
   print("Processing {}".format(obj))
   counts[type(obj)] += 1
   if type(obj) == fr.Reset:
-    assert obj.ref == 'refs/heads/B'
+    assert obj.ref == b'refs/heads/B'
 
 # Use all kinds of internals that external scripts should NOT use and which
 # are likely to break in the future, just to verify a few invariants...
diff --git a/t/t9392-python-callback.sh b/t/t9392-python-callback.sh
index 983879e..27c338c 100755
--- a/t/t9392-python-callback.sh
+++ b/t/t9392-python-callback.sh
@@ -51,7 +51,7 @@ test_expect_success '--filename-callback' '
 	setup filename-callback &&
 	(
 		cd filename-callback &&
-		git filter-repo --filename-callback "return None if filename.endswith(\".doc\") else \"src/\"+filename" &&
+		git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" &&
 		git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
 		! grep file.doc f &&
 		COMPARE=$(wc -l <f) &&
@@ -64,7 +64,7 @@ test_expect_success '--message-callback' '
 	setup message-callback &&
 	(
 		cd message-callback &&
-		git filter-repo --message-callback "return \"TLDR: \"+message[0:5]" &&
+		git filter-repo --message-callback "return b\"TLDR: \"+message[0:5]" &&
 		git log --format=%s >log-messages &&
 		grep TLDR:...... log-messages >modified-messages &&
 		test_line_count = 6 modified-messages
@@ -75,7 +75,7 @@ test_expect_success '--name-callback' '
 	setup name-callback &&
 	(
 		cd name-callback &&
-		git filter-repo --name-callback "return name.replace(\"N.\", \"And\")" &&
+		git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" &&
 		git log --format=%an >log-person-names &&
 		grep Copy.And.Paste log-person-names
 	)
@@ -85,7 +85,7 @@ test_expect_success '--email-callback' '
 	setup email-callback &&
 	(
 		cd email-callback &&
-		git filter-repo --email-callback "return email.replace(\".com\", \".org\")" &&
+		git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" &&
 		git log --format=%ae%n%ce >log-emails &&
 		! grep .com log-emails &&
 		grep .org log-emails
@@ -98,7 +98,7 @@ test_expect_success '--refname-callback' '
 		cd refname-callback &&
 		git filter-repo --refname-callback "
                     dir,path = os.path.split(refname)
-                    return dir+\"/prefix-\"+path" &&
+                    return dir+b\"/prefix-\"+path" &&
 		git show-ref | grep refs/heads/prefix-master &&
 		git show-ref | grep refs/tags/prefix-v1.0 &&
 		git show-ref | grep refs/tags/prefix-v2.0
@@ -110,7 +110,7 @@ test_expect_success '--refname-callback sanity check' '
 	(
 		cd refname-sanity-check &&
 
-		test_must_fail git filter-repo --refname-callback "return re.sub(\"tags\", \"other-tags\", refname)" 2>../err &&
+		test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err &&
 		test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err &&
 		rm ../err
 	)
@@ -138,7 +138,7 @@ test_expect_success '--commit-callback' '
                     commit.committer_email = commit.author_email
                     commit.committer_date  = commit.author_date
                     for change in commit.file_changes:
-                      change.mode = \"100755\"
+                      change.mode = b\"100755\"
                     " &&
 		git log --format=%ae%n%ce >log-emails &&
 		! grep committer@example.com log-emails &&
@@ -153,8 +153,8 @@ test_expect_success '--tag-callback' '
 	(
 		cd tag-callback &&
 		git filter-repo --tag-callback "
-                    tag.tagger_name = \"Dr. \"+tag.tagger_name
-                    tag.message = \"Awesome sauce \"+tag.message
+                    tag.tagger_name = b\"Dr. \"+tag.tagger_name
+                    tag.message = b\"Awesome sauce \"+tag.message
                     " &&
 		git cat-file -p v2.0 | grep ^tagger.Dr\\. &&
 		git cat-file -p v2.0 | grep ^Awesome.sauce.Super
@@ -175,7 +175,7 @@ test_expect_success 'callback has return statement sanity check' '
 	(
 		cd callback_return_sanity &&
 
-		test_must_fail git filter-repo --filename-callback "filename + \".txt\"" 2>../err&&
+		test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&&
 		test_i18ngrep "Error: --filename-callback should have a return statement" ../err &&
 		rm ../err
 	)