filter-repo (python3): replace strings with bytestrings

This is by far the largest python3 change; it consists basically of
  * using b'<str>' instead of '<str>' in lots of places
  * adding a .encode() if we really do work with a string but need to
    get it converted to a bytestring
  * replace uses of .format() with interpolation via the '%' operator,
    since bytestrings don't have a .format() method.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-04-27 15:18:59 -07:00
parent 385b0586ca
commit 35052f673d
10 changed files with 408 additions and 396 deletions

File diff suppressed because it is too large Load Diff

View File

@ -450,15 +450,15 @@ test_expect_success C_LOCALE_OUTPUT '--analyze' '
head -n 9 README >actual &&
test_cmp expect actual &&
cat | tr Q "\047" >expect <<-\EOF &&
cat >expect <<-\EOF &&
=== Files by sha and associated pathnames in reverse size ===
Format: sha, unpacked size, packed size, filename(s) object stored as
a89c82a2d4b713a125a4323d25adda062cc0013d 44 48 numbers/medium.num
f00c965d8307308469e537302baa73048488f162 21 37 numbers/small.num
2aa69a2a708eed00cb390e30f6bcc3eed773f390 20 36 whatever
51b95456de9274c9a95f756742808dfd480b9b35 13 29 [QcapriciousQ, QfickleQ, QmercurialQ]
732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [Qsequence/knowQ, Qwords/knowQ]
34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [Qsequence/toQ, Qwords/toQ]
51b95456de9274c9a95f756742808dfd480b9b35 13 29 [capricious, fickle, mercurial]
732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0 5 20 [sequence/know, words/know]
34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373 5 20 [sequence/to, words/to]
7ecb56eb3fa3fa6f19dd48bca9f971950b119ede 3 18 words/know
EOF
test_cmp expect blob-shas-and-paths.txt &&
@ -795,7 +795,7 @@ test_expect_success 'incremental import' '
original=$(git rev-parse master) &&
git fast-export --reference-excluded-parents master~2..master \
| git filter-repo --stdin --refname-callback "return \"develop\"" &&
| git filter-repo --stdin --refname-callback "return b\"develop\"" &&
test "$(git rev-parse develop)" = "$original"
)
'

View File

@ -13,12 +13,12 @@ import git_filter_repo as fr
def change_up_them_commits(commit):
# Change the commit author
if commit.author_name == "Copy N. Paste":
commit.author_name = "Ima L. Oser"
commit.author_email = "aloser@my.corp"
if commit.author_name == b"Copy N. Paste":
commit.author_name = b"Ima L. Oser"
commit.author_email = b"aloser@my.corp"
# Fix the author email
commit.author_email = re.sub("@my.crp", "@my.corp", commit.author_email)
commit.author_email = re.sub(b"@my.crp", b"@my.corp", commit.author_email)
# Fix the committer date (bad timezone conversion in initial import)
oldtime = fr.string_to_date(commit.committer_date)
@ -26,7 +26,7 @@ def change_up_them_commits(commit):
commit.committer_date = fr.date_to_string(newtime)
# Fix the commit message
commit.message = re.sub("Marketing is staffed with pansies", "",
commit.message = re.sub(b"Marketing is staffed with pansies", b"",
commit.message)
args = fr.FilteringOptions.parse_args(['--force'])

View File

@ -23,82 +23,82 @@ out.importer_only()
output = out._output
world = Blob("Hello")
world = Blob(b"Hello")
world.dump(output)
bar = Blob("foo\n")
bar = Blob(b"foo\n")
bar.dump(output)
master = Reset("refs/heads/master")
master = Reset(b"refs/heads/master")
master.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644"),
FileChanges('M', 'bar', bar.id, mode="100644")]
changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges(b'M', b'bar', bar.id, mode=b"100644")]
when = datetime(year=2005, month=4, day=7,
hour=15, minute=16, second=10,
tzinfo=FixedTimeZone("-0700"))
tzinfo=FixedTimeZone(b"-0700"))
when_string = fr.date_to_string(when)
commit1 = Commit("refs/heads/master",
"A U Thor", "au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string,
"My first commit! Wooot!\n\nLonger description",
commit1 = Commit(b"refs/heads/master",
b"A U Thor", b"au@thor.email", when_string,
b"Com M. Iter", b"comm@iter.email", when_string,
b"My first commit! Wooot!\n\nLonger description",
changes,
parents = [])
commit1.dump(output)
world = Blob("Hello\nHi")
world = Blob(b"Hello\nHi")
world.dump(output)
world_link = Blob("world")
world_link = Blob(b"world")
world_link.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644"),
FileChanges('M', 'planet', world_link.id, mode="120000")]
changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
when += timedelta(days=3, hours=4, minutes=6)
when_string = fr.date_to_string(when)
commit2 = Commit("refs/heads/master",
"A U Thor", "au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string,
"Make a symlink to world called planet, modify world",
commit2 = Commit(b"refs/heads/master",
b"A U Thor", b"au@thor.email", when_string,
b"Com M. Iter", b"comm@iter.email", when_string,
b"Make a symlink to world called planet, modify world",
changes,
parents = [commit1.id])
commit2.dump(output)
script = Blob("#!/bin/sh\n\necho Hello")
script = Blob(b"#!/bin/sh\n\necho Hello")
script.dump(output)
changes = [FileChanges('M', 'runme', script.id, mode="100755"),
FileChanges('D', 'bar')]
when_string = "1234567890 -0700"
commit3 = Commit("refs/heads/master",
"A U Thor", "au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string,
"Add runme script, remove bar",
changes = [FileChanges(b'M', b'runme', script.id, mode=b"100755"),
FileChanges(b'D', b'bar')]
when_string = b"1234567890 -0700"
commit3 = Commit(b"refs/heads/master",
b"A U Thor", b"au@thor.email", when_string,
b"Com M. Iter", b"comm@iter.email", when_string,
b"Add runme script, remove bar",
changes,
parents = [commit2.id])
commit3.dump(output)
progress = Progress("Done with the master branch now...")
progress = Progress(b"Done with the master branch now...")
progress.dump(output)
checkpoint = Checkpoint()
checkpoint.dump(output)
devel = Reset("refs/heads/devel", commit1.id)
devel = Reset(b"refs/heads/devel", commit1.id)
devel.dump(output)
world = Blob("Hello\nGoodbye")
world = Blob(b"Hello\nGoodbye")
world.dump(output)
changes = [FileChanges('M', 'world', world.id, mode="100644")]
when = datetime(2006, 8, 17, tzinfo=FixedTimeZone("+0200"))
changes = [FileChanges(b'M', b'world', world.id, mode=b"100644")]
when = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b"+0200"))
when_string = fr.date_to_string(when)
commit4 = Commit("refs/heads/devel",
"A U Thor", "au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string,
"Modify world",
commit4 = Commit(b"refs/heads/devel",
b"A U Thor", b"au@thor.email", when_string,
b"Com M. Iter", b"comm@iter.email", when_string,
b"Modify world",
changes,
parents = [commit1.id])
commit4.dump(output)
world = Blob("Hello\nHi\nGoodbye")
world = Blob(b"Hello\nHi\nGoodbye")
world.dump(output)
when = fr.string_to_date(commit3.author_date) + timedelta(days=47)
when_string = fr.date_to_string(when)
@ -106,22 +106,22 @@ when_string = fr.date_to_string(when)
# to the first parent. Thus, despite the fact that runme and planet have
# not changed and bar was not modified in the devel side, we have to list them
# all anyway.
changes = [FileChanges('M', 'world', world.id, mode="100644"),
FileChanges('D', 'bar'),
FileChanges('M', 'runme', script.id, mode="100755"),
FileChanges('M', 'planet', world_link.id, mode="120000")]
changes = [FileChanges(b'M', b'world', world.id, mode=b"100644"),
FileChanges(b'D', b'bar'),
FileChanges(b'M', b'runme', script.id, mode=b"100755"),
FileChanges(b'M', b'planet', world_link.id, mode=b"120000")]
commit5 = Commit("refs/heads/devel",
"A U Thor", "au@thor.email", when_string,
"Com M. Iter", "comm@iter.email", when_string,
"Merge branch 'master'\n",
commit5 = Commit(b"refs/heads/devel",
b"A U Thor", b"au@thor.email", when_string,
b"Com M. Iter", b"comm@iter.email", when_string,
b"Merge branch 'master'\n",
changes,
parents = [commit4.id, commit3.id])
commit5.dump(output)
mytag = Tag("refs/tags/v1.0", commit5.id,
"His R. Highness", "royalty@my.kingdom", when_string,
"I bequeath to my peons this royal software")
mytag = Tag(b"refs/tags/v1.0", commit5.id,
b"His R. Highness", b"royalty@my.kingdom", when_string,
b"I bequeath to my peons this royal software")
mytag.dump(output)
out.finish()

View File

@ -15,14 +15,14 @@ import sys
import git_filter_repo as fr
def drop_file_by_contents(blob):
bad_file_contents = 'The launch code is 1-2-3-4.'
bad_file_contents = b'The launch code is 1-2-3-4.'
if blob.data == bad_file_contents:
blob.skip()
def drop_files_by_name(commit):
new_file_changes = []
for change in commit.file_changes:
if not change.filename.endswith('.doc'):
if not change.filename.endswith(b'.doc'):
new_file_changes.append(change)
commit.file_changes = new_file_changes

View File

@ -14,8 +14,8 @@ not try to handle any such special cases.
import git_filter_repo as fr
def my_commit_callback(commit):
if commit.branch == "refs/heads/master":
commit.branch = "refs/heads/develop"
if commit.branch == b"refs/heads/master":
commit.branch = b"refs/heads/develop"
args = fr.FilteringOptions.default_options()
args.force = True

View File

@ -29,11 +29,11 @@ class InterleaveRepositories:
def hold_commit(self, commit):
commit.skip(new_id = commit.id)
letter = re.match('Commit (.)', commit.message).group(1)
letter = re.match(b'Commit (.)', commit.message).group(1)
self.commit_map[letter] = commit
def weave_commit(self, commit):
letter = re.match('Commit (.)', commit.message).group(1)
letter = re.match(b'Commit (.)', commit.message).group(1)
prev_letter = bytes([ord(letter)-1])
# Splice in any extra commits needed
@ -53,10 +53,10 @@ class InterleaveRepositories:
fr.record_id_rename(new_commit.id, commit.id)
def run(self):
blob = fr.Blob('public gpg key contents')
tag = fr.Tag('gpg-pubkey', blob.id,
'Ima Tagger', 'ima@tagg.er', '1136199845 +0300',
'Very important explanation and stuff')
blob = fr.Blob(b'public gpg key contents')
tag = fr.Tag(b'gpg-pubkey', blob.id,
b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',
b'Very important explanation and stuff')
args = fr.FilteringOptions.parse_args(['--target', self.output_dir])
out = fr.RepoFilter(args)

View File

@ -18,8 +18,8 @@ def strip_cvs_keywords(blob):
# FIXME: Should first check if blob is a text file to avoid ruining
# binaries. Could use python.magic here, or just output blob.data to
# the unix 'file' command
pattern = r'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
replacement = r'$\1$'
pattern = br'\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\$'
replacement = br'$\1$'
blob.data = re.sub(pattern, replacement, blob.data)
args = fr.FilteringOptions.parse_args(['--force'])

View File

@ -21,7 +21,7 @@ import textwrap
import git_filter_repo as fr
def handle_progress(progress):
print("Decipher this: "+bytes(reversed(progress.message)))
print(b"Decipher this: "+bytes(reversed(progress.message)))
def handle_checkpoint(checkpoint_object):
# Flip a coin; see if we want to pass the checkpoint through.
@ -44,8 +44,8 @@ def track_everything(obj):
# projects, I'm just verifying an invariant of the current code.
assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]
mystr = 'This is the contents of the blob'
compare = "Blob:\n blob\n mark :1\n data {}\n {}".format(len(mystr), mystr)
mystr = b'This is the contents of the blob'
compare = b"Blob:\n blob\n mark :1\n data %d\n %s" % (len(mystr), mystr)
# Next line's only purpose is testing code coverage of something that helps
# debugging git-filter-repo; it is NOT something external folks should depend
# upon.
@ -102,14 +102,14 @@ stream = io.BytesIO(textwrap.dedent('''
from :3
M 100644 :1 salutation
'''[1:]))
'''[1:]).encode())
counts = collections.Counter()
def look_for_reset(obj):
print("Processing {}".format(obj))
counts[type(obj)] += 1
if type(obj) == fr.Reset:
assert obj.ref == 'refs/heads/B'
assert obj.ref == b'refs/heads/B'
# Use all kinds of internals that external scripts should NOT use and which
# are likely to break in the future, just to verify a few invariants...

View File

@ -51,7 +51,7 @@ test_expect_success '--filename-callback' '
setup filename-callback &&
(
cd filename-callback &&
git filter-repo --filename-callback "return None if filename.endswith(\".doc\") else \"src/\"+filename" &&
git filter-repo --filename-callback "return None if filename.endswith(b\".doc\") else b\"src/\"+filename" &&
git log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&
! grep file.doc f &&
COMPARE=$(wc -l <f) &&
@ -64,7 +64,7 @@ test_expect_success '--message-callback' '
setup message-callback &&
(
cd message-callback &&
git filter-repo --message-callback "return \"TLDR: \"+message[0:5]" &&
git filter-repo --message-callback "return b\"TLDR: \"+message[0:5]" &&
git log --format=%s >log-messages &&
grep TLDR:...... log-messages >modified-messages &&
test_line_count = 6 modified-messages
@ -75,7 +75,7 @@ test_expect_success '--name-callback' '
setup name-callback &&
(
cd name-callback &&
git filter-repo --name-callback "return name.replace(\"N.\", \"And\")" &&
git filter-repo --name-callback "return name.replace(b\"N.\", b\"And\")" &&
git log --format=%an >log-person-names &&
grep Copy.And.Paste log-person-names
)
@ -85,7 +85,7 @@ test_expect_success '--email-callback' '
setup email-callback &&
(
cd email-callback &&
git filter-repo --email-callback "return email.replace(\".com\", \".org\")" &&
git filter-repo --email-callback "return email.replace(b\".com\", b\".org\")" &&
git log --format=%ae%n%ce >log-emails &&
! grep .com log-emails &&
grep .org log-emails
@ -98,7 +98,7 @@ test_expect_success '--refname-callback' '
cd refname-callback &&
git filter-repo --refname-callback "
dir,path = os.path.split(refname)
return dir+\"/prefix-\"+path" &&
return dir+b\"/prefix-\"+path" &&
git show-ref | grep refs/heads/prefix-master &&
git show-ref | grep refs/tags/prefix-v1.0 &&
git show-ref | grep refs/tags/prefix-v2.0
@ -110,7 +110,7 @@ test_expect_success '--refname-callback sanity check' '
(
cd refname-sanity-check &&
test_must_fail git filter-repo --refname-callback "return re.sub(\"tags\", \"other-tags\", refname)" 2>../err &&
test_must_fail git filter-repo --refname-callback "return re.sub(b\"tags\", b\"other-tags\", refname)" 2>../err &&
test_i18ngrep "fast-import requires tags to be in refs/tags/ namespace" ../err &&
rm ../err
)
@ -138,7 +138,7 @@ test_expect_success '--commit-callback' '
commit.committer_email = commit.author_email
commit.committer_date = commit.author_date
for change in commit.file_changes:
change.mode = \"100755\"
change.mode = b\"100755\"
" &&
git log --format=%ae%n%ce >log-emails &&
! grep committer@example.com log-emails &&
@ -153,8 +153,8 @@ test_expect_success '--tag-callback' '
(
cd tag-callback &&
git filter-repo --tag-callback "
tag.tagger_name = \"Dr. \"+tag.tagger_name
tag.message = \"Awesome sauce \"+tag.message
tag.tagger_name = b\"Dr. \"+tag.tagger_name
tag.message = b\"Awesome sauce \"+tag.message
" &&
git cat-file -p v2.0 | grep ^tagger.Dr\\. &&
git cat-file -p v2.0 | grep ^Awesome.sauce.Super
@ -175,7 +175,7 @@ test_expect_success 'callback has return statement sanity check' '
(
cd callback_return_sanity &&
test_must_fail git filter-repo --filename-callback "filename + \".txt\"" 2>../err&&
test_must_fail git filter-repo --filename-callback "filename + b\".txt\"" 2>../err&&
test_i18ngrep "Error: --filename-callback should have a return statement" ../err &&
rm ../err
)