filter-repo: implement --strip-blobs-with-ids

Add a flag allowing for specifying a file filled with blob-ids which
will be stripped from the repository.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-05-30 19:42:57 -07:00
parent d958b0345c
commit b6a35f8dcd
2 changed files with 59 additions and 0 deletions

View File

@ -1639,6 +1639,8 @@ EXAMPLES
dest='max_blob_size', default=0,
help=_("Strip blobs (files) bigger than specified size (e.g. '5M', "
"'2G', etc)"))
contents.add_argument('--strip-blobs-with-ids', metavar='BLOB-ID-FILENAME',
help=_("Strip blob with the specified git object ids (hashes)"))
refrename = parser.add_argument_group(title=_("Renaming of refs "
"(see also --refname-callback)"))
@ -1926,6 +1928,11 @@ EXAMPLES
args.mailmap = MailmapInfo(args.mailmap)
if args.replace_text:
args.replace_text = FilteringOptions.get_replace_text(args.replace_text)
if args.strip_blobs_with_ids:
with open(args.strip_blobs_with_ids, 'br') as f:
args.strip_blobs_with_ids = set(f.read().split())
else:
args.strip_blobs_with_ids = set()
return args
class RepoAnalyze(object):
@ -2919,6 +2926,9 @@ class RepoFilter(object):
if self._args.max_blob_size and len(blob.data) > self._args.max_blob_size:
blob.skip()
if blob.original_id in self._args.strip_blobs_with_ids:
blob.skip()
if self._args.replace_text:
for literal, replacement in self._args.replace_text['literals']:
blob.data = blob.data.replace(literal, replacement)
@ -3051,6 +3061,10 @@ class RepoFilter(object):
if self._args.max_blob_size and \
self._unpacked_size.get(change.blob_id, 0) > self._args.max_blob_size:
continue
if self._args.strip_blobs_with_ids and \
change.blob_id in self._args.strip_blobs_with_ids:
continue
# Otherwise, record the change
new_file_changes[change.filename] = change
commit.file_changes = [v for k,v in sorted(new_file_changes.items())]

View File

@ -710,6 +710,51 @@ test_expect_success '--strip-blobs-bigger-than' '
)
'
test_expect_success '--strip-blobs-with-ids' '
(
git clone file://"$(pwd)"/analyze_me strip_blobs_with_ids &&
cd strip_blobs_with_ids &&
# Verify certain files are present initially
git log --format=%n --name-only | sort | uniq >../filenames &&
test_line_count = 11 ../filenames &&
grep fake_submodule ../filenames &&
# Strip "a certain file" files
git filter-repo --strip-blobs-with-ids <(echo deadbeefdeadbeefdeadbeefdeadbeefdeadbeef) &&
git log --format=%n --name-only | sort | uniq >../filenames &&
test_line_count = 10 ../filenames &&
# Make sure fake_submodule was removed
! grep fake_submodule ../filenames &&
# Do it again, this time with --replace-text since that means
# we are operating without --no-data and have to go through
# a different codepath. (The search/replace terms are bogus)
cat >../bad-ids <<-\EOF &&
34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373
51b95456de9274c9a95f756742808dfd480b9b35
EOF
cat >../replace-rules <<-\EOF &&
not found==>was found
EOF
git filter-repo --strip-blobs-with-ids ../bad-ids --replace-text ../replace-rules &&
git log --format=%n --name-only | sort | uniq >../filenames &&
test_line_count = 5 ../filenames &&
! grep sequence/to ../filenames &&
! grep words/to ../filenames &&
! grep capricious ../filenames &&
! grep fickle ../filenames &&
! grep mercurial ../filenames
# Remove the temporary auxiliary files
rm ../bad-ids &&
rm ../replace-rules &&
rm ../filenames
)
'
test_expect_success 'setup commit message rewriting' '
test_create_repo commit_msg &&
(