filter-repo (python3): replace strings with bytestrings

This is by far the largest python3 change; it consists basically of
  * using b'<str>' instead of '<str>' in lots of places
  * adding a .encode() if we really do work with a string but need to
    get it converted to a bytestring
  * replace uses of .format() with interpolation via the '%' operator,
    since bytestrings don't have a .format() method.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-04-27 15:18:59 -07:00
parent 385b0586ca
commit 35052f673d
10 changed files with 408 additions and 396 deletions

File diff suppressed because it is too large Load Diff

View File

@ -450,15 +450,15 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
head -n 9 README >actual && head -n 9 README >actual &&
test_cmp expect actual && test_cmp expect actual &&
cat | tr Q "\047" >expect <<-\EOF && cat >expect <<-\EOF &&
=== Files by sha and associated pathnames in reverse size === === Files by sha and associated pathnames in reverse size ===
Format: sha, unpacked size, packed size, filename(s) object stored as Format: sha, unpacked size, packed size, filename(s) object stored as
a89c82a2d4b713a125a4323d25adda062cc0013d 44 48 numbers/medium.num a89c82a2d4b713a125a4323d25adda062cc0013d 44 48 numbers/medium.num
f00c965d8307308469e537302baa73048488f162 21 37 numbers/small.num f00c965d8307308469e537302baa73048488f162 21 37 numbers/small.num
2aa69a2a708eed00cb390e30f6bcc3eed773f390 20 36 whatever 2aa69a2a708eed00cb390e30f6bcc3eed773f390 20 36 whatever
51b95456de9274c9a95f756742808dfd480b9b35 13 29 [QcapriciousQ, QfickleQ, QmercurialQ] 51b95456de9274c9a95f756742808dfd480b9b35 13 29 [capricious, fickle, mercurial]
732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [Qsequence/knowQ, Qwords/knowQ] 732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [sequence/know, words/know]
34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [Qsequence/toQ, Qwords/toQ] 34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [sequence/to, words/to]
7ecb56eb3fa3fa6f19dd48bca9f971950b119ede 3 18 words/know 7ecb56eb3fa3fa6f19dd48bca9f971950b119ede 3 18 words/know
EOF EOF
test_cmp expect blob-shas-and-paths.txt && test_cmp expect blob-shas-and-paths.txt &&
@ -795,7 +795,7 @@ test_expect_success 'incremental import' '
original=$(git rev-parse master) && original=$(git rev-parse master) &&
git fast-export --reference-excluded-parents master~2..master \ git fast-export --reference-excluded-parents master~2..master \
| git filter-repo --stdin --refname-callback "return \"develop\"" && | git filter-repo --stdin --refname-callback "return b\"develop\"" &&
test "$(git rev-parse develop)" = "$original" test "$(git rev-parse develop)" = "$original"
) )
' '

View File

@ -13,12 +13,12 @@ import git_filter_repo as fr
def change_up_them_commits(commit): def change_up_them_commits(commit):
# Change the commit author # Change the commit author
if commit.author_name == "Copy N. Paste": if commit.author_name == b"Copy N. Paste":
commit.author_name = "Ima L. Oser" commit.author_name = b"Ima L. Oser"
commit.author_email = "aloser@my.corp" commit.author_email = b"aloser@my.corp"
# Fix the author email # Fix the author email
commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email) commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email)
# Fix the committer date (bad timezone conversion in initial import) # Fix the committer date (bad timezone conversion in initial import)
oldtime = fr.string_to_date(commit.committer_date) oldtime = fr.string_to_date(commit.committer_date)
@ -26,7 +26,7 @@ def change_up_them_commits(commit):
commit.committer_date = fr.date_to_string(newtime) commit.committer_date = fr.date_to_string(newtime)
# Fix the commit message # Fix the commit message
commit.message = re.sub("Marketing is staffed with pansies", "", commit.message = re.sub(b"Marketing is staffed with pansies", b"",
commit.message) commit.message)
args = fr.FilteringOptions.parse_args(['--force']) args = fr.FilteringOptions.parse_args(['--force'])

View File

@ -23,82 +23,82 @@ out.importer_only()
output = out._output output = out._output
world = Blob("Hello") world = Blob(b"Hello")
world.dump(output) world.dump(output)
bar = Blob("foo\n") bar = Blob(b"foo\n")
bar.dump(output) bar.dump(output)
master = Reset("refs/heads/master") master = Reset(b"refs/heads/master")
master.dump(output) master.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644"), changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges('M', 'bar', bar.id, mode="100644")] FileChanges(b'M', b'bar', bar.id, mode=b"100644")]
when = datetime(year=2005, month=4, day=7, when = datetime(year=2005, month=4, day=7,
hour=15, minute=16, second=10, hour=15, minute=16, second=10,
tzinfo=FixedTimeZone("-0700")) tzinfo=FixedTimeZone(b"-0700"))
when_string = fr.date_to_string(when) when_string = fr.date_to_string(when)
commit1 = Commit("refs/heads/master", commit1 = Commit(b"refs/heads/master",
"A U Thor", "au@thor.email", when_string, b"A U Thor", b"au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string, b"Com M. Iter", b"comm@iter.email", when_string,
"My first commit! Wooot!\n\nLonger description", b"My first commit! Wooot!\n\nLonger description",
changes, changes,
parents = []) parents = [])
commit1.dump(output) commit1.dump(output)
world = Blob("Hello\nHi") world = Blob(b"Hello\nHi")
world.dump(output) world.dump(output)
world_link = Blob("world") world_link = Blob(b"world")
world_link.dump(output) world_link.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644"), changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges('M', 'planet', world_link.id, mode="120000")] FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
when += timedelta(days=3, hours=4, minutes=6) when += timedelta(days=3, hours=4, minutes=6)
when_string = fr.date_to_string(when) when_string = fr.date_to_string(when)
commit2 = Commit("refs/heads/master", commit2 = Commit(b"refs/heads/master",
"A U Thor", "au@thor.email", when_string, b"A U Thor", b"au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string, b"Com M. Iter", b"comm@iter.email", when_string,
"Make a symlink to world called planet, modify world", b"Make a symlink to world called planet, modify world",
changes, changes,
parents = [commit1.id]) parents = [commit1.id])
commit2.dump(output) commit2.dump(output)
script = Blob("#!/bin/sh\n\necho Hello") script = Blob(b"#!/bin/sh\n\necho Hello")
script.dump(output) script.dump(output)
changes = [FileChanges('M', 'runme', script.id, mode="100755"), changes = [FileChanges(b'M', b'runme', script.id, mode=b"100755"),
FileChanges('D', 'bar')] FileChanges(b'D', b'bar')]
when_string = "1234567890 -0700" when_string = b"1234567890 -0700"
commit3 = Commit("refs/heads/master", commit3 = Commit(b"refs/heads/master",
"A U Thor", "au@thor.email", when_string, b"A U Thor", b"au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string, b"Com M. Iter", b"comm@iter.email", when_string,
"Add runme script, remove bar", b"Add runme script, remove bar",
changes, changes,
parents = [commit2.id]) parents = [commit2.id])
commit3.dump(output) commit3.dump(output)
progress = Progress("Done with the master branch now...") progress = Progress(b"Done with the master branch now...")
progress.dump(output) progress.dump(output)
checkpoint = Checkpoint() checkpoint = Checkpoint()
checkpoint.dump(output) checkpoint.dump(output)
devel = Reset("refs/heads/devel", commit1.id) devel = Reset(b"refs/heads/devel", commit1.id)
devel.dump(output) devel.dump(output)
world = Blob("Hello\nGoodbye") world = Blob(b"Hello\nGoodbye")
world.dump(output) world.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644")] changes = [FileChanges(b'M', b'world', world.id, mode=b"100644")]
when = datetime(2006, 8, 17, tzinfo=FixedTimeZone("+0200")) when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200"))
when_string = fr.date_to_string(when) when_string = fr.date_to_string(when)
commit4 = Commit("refs/heads/devel", commit4 = Commit(b"refs/heads/devel",
"A U Thor", "au@thor.email", when_string, b"A U Thor", b"au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string, b"Com M. Iter", b"comm@iter.email", when_string,
"Modify world", b"Modify world",
changes, changes,
parents = [commit1.id]) parents = [commit1.id])
commit4.dump(output) commit4.dump(output)
world = Blob("Hello\nHi\nGoodbye") world = Blob(b"Hello\nHi\nGoodbye")
world.dump(output) world.dump(output)
when = fr.string_to_date(commit3.author_date) + timedelta(days=47) when = fr.string_to_date(commit3.author_date) + timedelta(days=47)
when_string = fr.date_to_string(when) when_string = fr.date_to_string(when)
@ -106,22 +106,22 @@ when_string = fr.date_to_string(when)
# to the first parent. Thus, despite the fact that runme and planet have # to the first parent. Thus, despite the fact that runme and planet have
# not changed and bar was not modified in the devel side, we have to list them # not changed and bar was not modified in the devel side, we have to list them
# all anyway. # all anyway.
changes = [FileChanges('M', 'world', world.id, mode="100644"), changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges('D', 'bar'), FileChanges(b'D', b'bar'),
FileChanges('M', 'runme', script.id, mode="100755"), FileChanges(b'M', b'runme', script.id, mode=b"100755"),
FileChanges('M', 'planet', world_link.id, mode="120000")] FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
commit5 = Commit("refs/heads/devel", commit5 = Commit(b"refs/heads/devel",
"A U Thor", "au@thor.email", when_string, b"A U Thor", b"au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string, b"Com M. Iter", b"comm@iter.email", when_string,
"Merge branch 'master'\n", b"Merge branch 'master'\n",
changes, changes,
parents = [commit4.id, commit3.id]) parents = [commit4.id, commit3.id])
commit5.dump(output) commit5.dump(output)
mytag = Tag("refs/tags/v1.0", commit5.id, mytag = Tag(b"refs/tags/v1.0", commit5.id,
"His R. Highness", "royalty@my.kingdom", when_string, b"His R. Highness", b"royalty@my.kingdom", when_string,
"I bequeath to my peons this royal software") b"I bequeath to my peons this royal software")
mytag.dump(output) mytag.dump(output)
out.finish() out.finish()

View File

@ -15,14 +15,14 @@ import sys
import git_filter_repo as fr import git_filter_repo as fr
def drop_file_by_contents(blob): def drop_file_by_contents(blob):
bad_file_contents = 'The launch code is 1-2-3-4.' bad_file_contents = b'The launch code is 1-2-3-4.'
if blob.data == bad_file_contents: if blob.data == bad_file_contents:
blob.skip() blob.skip()
def drop_files_by_name(commit): def drop_files_by_name(commit):
new_file_changes = [] new_file_changes = []
for change in commit.file_changes: for change in commit.file_changes:
if not change.filename.endswith('.doc'): if not change.filename.endswith(b'.doc'):
new_file_changes.append(change) new_file_changes.append(change)
commit.file_changes = new_file_changes commit.file_changes = new_file_changes

View File

@ -14,8 +14,8 @@ not try to handle any such special cases.
import git_filter_repo as fr import git_filter_repo as fr
def my_commit_callback(commit): def my_commit_callback(commit):
if commit.branch == "refs/heads/master": if commit.branch == b"refs/heads/master":
commit.branch = "refs/heads/develop" commit.branch = b"refs/heads/develop"
args = fr.FilteringOptions.default_options() args = fr.FilteringOptions.default_options()
args.force = True args.force = True

View File

@ -29,11 +29,11 @@ class InterleaveRepositories:
def hold_commit(self, commit): def hold_commit(self, commit):
commit.skip(new_id = commit.id) commit.skip(new_id = commit.id)
letter = re.match('Commit (.)', commit.message).group(1) letter = re.match(b'Commit (.)', commit.message).group(1)
self.commit_map[letter] = commit self.commit_map[letter] = commit
def weave_commit(self, commit): def weave_commit(self, commit):
letter = re.match('Commit (.)', commit.message).group(1) letter = re.match(b'Commit (.)', commit.message).group(1)
prev_letter = bytes([ord(letter)-1]) prev_letter = bytes([ord(letter)-1])
# Splice in any extra commits needed # Splice in any extra commits needed
@ -53,10 +53,10 @@ class InterleaveRepositories:
fr.record_id_rename(new_commit.id, commit.id) fr.record_id_rename(new_commit.id, commit.id)
def run(self): def run(self):
blob = fr.Blob('public gpg key contents') blob = fr.Blob(b'public gpg key contents')
tag = fr.Tag('gpg-pubkey', blob.id, tag = fr.Tag(b'gpg-pubkey', blob.id,
'Ima Tagger', 'ima@tagg.er', '1136199845 +0300', b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',
'Very important explanation and stuff') b'Very important explanation and stuff')
args = fr.FilteringOptions.parse_args(['--target', self.output_dir]) args = fr.FilteringOptions.parse_args(['--target', self.output_dir])
out = fr.RepoFilter(args) out = fr.RepoFilter(args)

View File

@ -18,8 +18,8 @@ def strip_cvs_keywords(blob):
# FIXME: Should first check if blob is a text file to avoid ruining # FIXME: Should first check if blob is a text file to avoid ruining
# binaries. Could use python.magic here, or just output blob.data to # binaries. Could use python.magic here, or just output blob.data to
# the unix 'file' command # the unix 'file' command
pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$' pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
replacement = r'$\1$' replacement = br'$\1$'
blob.data = re.sub(pattern, replacement, blob.data) blob.data = re.sub(pattern, replacement, blob.data)
args = fr.FilteringOptions.parse_args(['--force']) args = fr.FilteringOptions.parse_args(['--force'])

View File

@ -21,7 +21,7 @@ import textwrap
import git_filter_repo as fr import git_filter_repo as fr
def handle_progress(progress): def handle_progress(progress):
print("Decipher this: "+bytes(reversed(progress.message))) print(b"Decipher this: "+bytes(reversed(progress.message)))
def handle_checkpoint(checkpoint_object): def handle_checkpoint(checkpoint_object):
# Flip a coin; see if we want to pass the checkpoint through. # Flip a coin; see if we want to pass the checkpoint through.
@ -44,8 +44,8 @@ def track_everything(obj):
# projects, I'm just verifying an invariant of the current code. # projects, I'm just verifying an invariant of the current code.
assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1] assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]
mystr = 'This is the contents of the blob' mystr = b'This is the contents of the blob'
compare = "Blob:\n blob\n mark :1\n data {}\n {}".format(len(mystr), mystr) compare = b"Blob:\n blob\n mark :1\n data %d\n %s" % (len(mystr), mystr)
# Next line's only purpose is testing code coverage of something that helps # Next line's only purpose is testing code coverage of something that helps
# debugging git-filter-repo; it is NOT something external folks should depend # debugging git-filter-repo; it is NOT something external folks should depend
# upon. # upon.
@ -102,14 +102,14 @@ stream = io.BytesIO(textwrap.dedent('''
from :3 from :3
M 100644 :1 salutation M 100644 :1 salutation
'''[1:])) '''[1:]).encode())
counts = collections.Counter() counts = collections.Counter()
def look_for_reset(obj): def look_for_reset(obj):
print("Processing {}".format(obj)) print("Processing {}".format(obj))
counts[type(obj)] += 1 counts[type(obj)] += 1
if type(obj) == fr.Reset: if type(obj) == fr.Reset:
assert obj.ref == 'refs/heads/B' assert obj.ref == b'refs/heads/B'
# Use all kinds of internals that external scripts should NOT use and which # Use all kinds of internals that external scripts should NOT use and which
# are likely to break in the future, just to verify a few invariants... # are likely to break in the future, just to verify a few invariants...

View File

@ -51,7 +51,7 @@ test_expect_success '--filename-callback' '
setup filename-callback && setup filename-callback &&
( (
cd filename-callback && cd filename-callback &&
git filter-repo --filename-callback "return None if filename.endswith(\".doc\") else \"src/\"+filename" && git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" &&
git log --format=%n --name-only | sort | uniq | grep -v ^$ > f && git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
! grep file.doc f && ! grep file.doc f &&
COMPARE=$(wc -l <f) && COMPARE=$(wc -l <f) &&
@ -64,7 +64,7 @@ test_expect_success '--message-callback' '
setup message-callback && setup message-callback &&
( (
cd message-callback && cd message-callback &&
git filter-repo --message-callback "return \"TLDR: \"+message[0:5]" && git filter-repo --message-callback "return b\"TLDR: \"+message[0:5]" &&
git log --format=%s >log-messages && git log --format=%s >log-messages &&
grep TLDR:...... log-messages >modified-messages && grep TLDR:...... log-messages >modified-messages &&
test_line_count = 6 modified-messages test_line_count = 6 modified-messages
@ -75,7 +75,7 @@ test_expect_success '--name-callback' '
setup name-callback && setup name-callback &&
( (
cd name-callback && cd name-callback &&
git filter-repo --name-callback "return name.replace(\"N.\", \"And\")" && git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" &&
git log --format=%an >log-person-names && git log --format=%an >log-person-names &&
grep Copy.And.Paste log-person-names grep Copy.And.Paste log-person-names
) )
@ -85,7 +85,7 @@ test_expect_success '--email-callback' '
setup email-callback && setup email-callback &&
( (
cd email-callback && cd email-callback &&
git filter-repo --email-callback "return email.replace(\".com\", \".org\")" && git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" &&
git log --format=%ae%n%ce >log-emails && git log --format=%ae%n%ce >log-emails &&
! grep .com log-emails && ! grep .com log-emails &&
grep .org log-emails grep .org log-emails
@ -98,7 +98,7 @@ test_expect_success '--refname-callback' '
cd refname-callback && cd refname-callback &&
git filter-repo --refname-callback " git filter-repo --refname-callback "
dir,path = os.path.split(refname) dir,path = os.path.split(refname)
return dir+\"/prefix-\"+path" && return dir+b\"/prefix-\"+path" &&
git show-ref | grep refs/heads/prefix-master && git show-ref | grep refs/heads/prefix-master &&
git show-ref | grep refs/tags/prefix-v1.0 && git show-ref | grep refs/tags/prefix-v1.0 &&
git show-ref | grep refs/tags/prefix-v2.0 git show-ref | grep refs/tags/prefix-v2.0
@ -110,7 +110,7 @@ test_expect_success '--refname-callback sanity check' '
( (
cd refname-sanity-check && cd refname-sanity-check &&
test_must_fail git filter-repo --refname-callback "return re.sub(\"tags\", \"other-tags\", refname)" 2>../err && test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err &&
test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err && test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err &&
rm ../err rm ../err
) )
@ -138,7 +138,7 @@ test_expect_success '--commit-callback' '
commit.committer_email = commit.author_email commit.committer_email = commit.author_email
commit.committer_date = commit.author_date commit.committer_date = commit.author_date
for change in commit.file_changes: for change in commit.file_changes:
change.mode = \"100755\" change.mode = b\"100755\"
" && " &&
git log --format=%ae%n%ce >log-emails && git log --format=%ae%n%ce >log-emails &&
! grep committer@example.com log-emails && ! grep committer@example.com log-emails &&
@ -153,8 +153,8 @@ test_expect_success '--tag-callback' '
( (
cd tag-callback && cd tag-callback &&
git filter-repo --tag-callback " git filter-repo --tag-callback "
tag.tagger_name = \"Dr. \"+tag.tagger_name tag.tagger_name = b\"Dr. \"+tag.tagger_name
tag.message = \"Awesome sauce \"+tag.message tag.message = b\"Awesome sauce \"+tag.message
" && " &&
git cat-file -p v2.0 | grep ^tagger.Dr\\. && git cat-file -p v2.0 | grep ^tagger.Dr\\. &&
git cat-file -p v2.0 | grep ^Awesome.sauce.Super git cat-file -p v2.0 | grep ^Awesome.sauce.Super
@ -175,7 +175,7 @@ test_expect_success 'callback has return statement sanity check' '
( (
cd callback_return_sanity && cd callback_return_sanity &&
test_must_fail git filter-repo --filename-callback "filename + \".txt\"" 2>../err&& test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&&
test_i18ngrep "Error: --filename-callback should have a return statement" ../err && test_i18ngrep "Error: --filename-callback should have a return statement" ../err &&
rm ../err rm ../err
) )