mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-05 09:52:15 +02:00
filter-repo: implement --strip-blobs-bigger-than
Add a flag for filtering out blob based on their size, and allow the size to be specified using 'K', 'M', or 'G' suffixes. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
598661dcf4
commit
587f727d19
@ -1587,6 +1587,10 @@ class FilteringOptions(object):
|
||||
"end the line with '==>' and some replacement text to "
|
||||
"choose a replacement choice other than the default of "
|
||||
"'***REMOVED***'. "))
|
||||
contents.add_argument('--strip-blobs-bigger-than', metavar='SIZE',
|
||||
dest='max_blob_size', default=0,
|
||||
help=_("Strip blobs (files) bigger than specified size (e.g. '5M', "
|
||||
"'2G', etc)"))
|
||||
|
||||
refrename = parser.add_argument_group(title=_("Renaming of refs "
|
||||
"(see also --refname-callback)"))
|
||||
@ -1764,6 +1768,17 @@ class FilteringOptions(object):
|
||||
raise SystemExit(_("Error: need a version of git whose diff-tree "
|
||||
"command has the --combined-all-paths option"))
|
||||
# End of sanity checks on git version
|
||||
if args.max_blob_size:
|
||||
suffix = args.max_blob_size[-1]
|
||||
if suffix not in '1234567890':
|
||||
mult = {'K': 1024, 'M': 1024**2, 'G': 1024**3}
|
||||
if suffix not in mult:
|
||||
raise SystemExit(_("Error: could not parse --strip-blobs-bigger-than"
|
||||
" argument %s")
|
||||
% args.max_blob_size)
|
||||
args.max_blob_size = int(args.max_blob_size[0:-1]) * mult[suffix]
|
||||
else:
|
||||
args.max_blob_size = int(args.max_blob_size)
|
||||
|
||||
@staticmethod
|
||||
def get_replace_text(filename):
|
||||
@ -2450,6 +2465,9 @@ class RepoFilter(object):
|
||||
self._progress_writer = ProgressWriter()
|
||||
self._num_commits = 0
|
||||
|
||||
# Size of blobs in the repo
|
||||
self._unpacked_size = {}
|
||||
|
||||
# Other vars
|
||||
self._sanity_checks_handled = False
|
||||
self._finalize_handled = False
|
||||
@ -2845,6 +2863,9 @@ class RepoFilter(object):
|
||||
**extra_items}
|
||||
|
||||
def _tweak_blob(self, blob):
|
||||
if self._args.max_blob_size and len(blob.data) > self._args.max_blob_size:
|
||||
blob.skip()
|
||||
|
||||
if self._args.replace_text:
|
||||
for literal, replacement in self._args.replace_text['literals']:
|
||||
blob.data = blob.data.replace(literal, replacement)
|
||||
@ -2973,6 +2994,10 @@ class RepoFilter(object):
|
||||
raise SystemExit(_("File renaming caused colliding pathnames!\n") +
|
||||
_(" Commit: {}\n").format(commit.original_id) +
|
||||
_(" Filename: {}").format(change.filename))
|
||||
# Strip files that are too large
|
||||
if self._args.max_blob_size and \
|
||||
self._unpacked_size.get(change.blob_id, 0) > self._args.max_blob_size:
|
||||
continue
|
||||
new_file_changes[change.filename] = change
|
||||
commit.file_changes = [v for k,v in sorted(new_file_changes.items())]
|
||||
|
||||
@ -3119,6 +3144,8 @@ class RepoFilter(object):
|
||||
extra_flags = []
|
||||
if skip_blobs:
|
||||
extra_flags.append('--no-data')
|
||||
if self._args.max_blob_size:
|
||||
self._unpacked_size, packed_size = GitUtils.get_blob_sizes()
|
||||
if use_done_feature:
|
||||
extra_flags.append('--use-done-feature')
|
||||
if self._args.preserve_commit_encoding is not None: # pragma: no cover
|
||||
|
@ -655,6 +655,61 @@ test_expect_success '--replace-text all options' '
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success '--strip-blobs-bigger-than' '
|
||||
(
|
||||
git clone file://"$(pwd)"/analyze_me strip_big_blobs &&
|
||||
cd strip_big_blobs &&
|
||||
|
||||
# Verify certain files are present initially
|
||||
git log --format=%n --name-only | sort | uniq >../filenames &&
|
||||
test_line_count = 11 ../filenames &&
|
||||
git rev-parse HEAD~7:numbers/medium.num &&
|
||||
git rev-parse HEAD~7:numbers/small.num &&
|
||||
git rev-parse HEAD~4:mercurial &&
|
||||
test -f mercurial &&
|
||||
|
||||
# Make one of the current files be "really big"
|
||||
test_seq 1 1000 >mercurial &&
|
||||
git add mercurial &&
|
||||
git commit --amend &&
|
||||
|
||||
# Strip "really big" files
|
||||
git filter-repo --force --strip-blobs-bigger-than 3K --prune-empty never &&
|
||||
|
||||
git log --format=%n --name-only | sort | uniq >../filenames &&
|
||||
test_line_count = 11 ../filenames &&
|
||||
# The "mercurial" file should still be around...
|
||||
git rev-parse HEAD~4:mercurial &&
|
||||
git rev-parse HEAD:mercurial &&
|
||||
# ...but only with its old, smaller contents
|
||||
test_line_count = 1 mercurial &&
|
||||
|
||||
# Strip files that are too big, verify they are gone
|
||||
git filter-repo --strip-blobs-bigger-than 40 &&
|
||||
|
||||
git log --format=%n --name-only | sort | uniq >../filenames &&
|
||||
test_line_count = 10 ../filenames &&
|
||||
test_must_fail git rev-parse HEAD~7:numbers/medium.num &&
|
||||
|
||||
# Do it again, this time with --replace-text since that means
|
||||
# we are operating without --no-data and have to go through
|
||||
# a different codepath. (The search/replace terms are bogus)
|
||||
cat >../replace-rules <<-\EOF &&
|
||||
not found==>was found
|
||||
EOF
|
||||
git filter-repo --strip-blobs-bigger-than 20 --replace-text ../replace-rules &&
|
||||
|
||||
git log --format=%n --name-only | sort | uniq >../filenames &&
|
||||
test_line_count = 9 ../filenames &&
|
||||
test_must_fail git rev-parse HEAD~7:numbers/medium.num &&
|
||||
test_must_fail git rev-parse HEAD~7:numbers/small.num &&
|
||||
|
||||
# Remove the temporary auxiliary files
|
||||
rm ../replace-rules &&
|
||||
rm ../filenames
|
||||
)
|
||||
'
|
||||
|
||||
test_expect_success 'setup commit message rewriting' '
|
||||
test_create_repo commit_msg &&
|
||||
(
|
||||
@ -897,7 +952,11 @@ test_expect_success 'other startup error cases and requests for help' '
|
||||
test_i18ngrep "either ends with a slash then both must." err &&
|
||||
|
||||
test_must_fail git filter-repo --paths-from-file <(echo "glob:*.py==>newname") 2>err &&
|
||||
test_i18ngrep "renaming globs makes no sense" err
|
||||
test_i18ngrep "renaming globs makes no sense" err &&
|
||||
|
||||
test_must_fail git filter-repo --strip-blobs-bigger-than 3GiB 2>err &&
|
||||
test_i18ngrep "could not parse.*3GiB" err
|
||||
|
||||
)
|
||||
'
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user