From 4f84a74adad3a37f5f67075a95939496a9e67d87 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Mon, 23 Mar 2020 10:40:15 -0700 Subject: [PATCH] filter-repo: use more expensive prunability checks when needed When users are inserting new objects into the stream, we cannot make as many assumptions and need to do more careful checks for whether commits become empty or not. Signed-off-by: Elijah Newren --- git-filter-repo | 22 ++++++++++++++++------ t/t9391-filter-repo-lib-usage.sh | 24 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/git-filter-repo b/git-filter-repo index 9fb19b3..6190a77 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -372,6 +372,12 @@ class _IDs(object): # A map of new-ids to every old-id that points to the new-id (1:N map) self._reverse_translation = {} + def has_renames(self): + """ + Return whether there have been ids remapped to new values + """ + return bool(self._translation) + def new(self): """ Should be called whenever a new blob or commit object is created. The @@ -3121,12 +3127,16 @@ class RepoFilter(object): if not self._import_pipes: return False - # non-merge commits can only be empty if blob/file-change editing caused - # all file changes in the commit to have the same file contents as - # the parent. - changed_files = set(change.filename for change in commit.file_changes) - if len(orig_parents) < 2 and changed_files - self._files_tweaked: - return False + # If there have not been renames/remappings of IDs (due to insertion of + # new blobs), then we can sometimes know things aren't prunable with a + # simple check + if not _IDS.has_renames(): + # non-merge commits can only be empty if blob/file-change editing caused + # all file changes in the commit to have the same file contents as + # the parent. + changed_files = set(change.filename for change in commit.file_changes) + if len(orig_parents) < 2 and changed_files - self._files_tweaked: + return False # Finally, the hard case: due to either blob rewriting, or due to pruning # of empty commits wiping out the first parent history back to the merge diff --git a/t/t9391-filter-repo-lib-usage.sh b/t/t9391-filter-repo-lib-usage.sh index ef9026d..f2ac101 100755 --- a/t/t9391-filter-repo-lib-usage.sh +++ b/t/t9391-filter-repo-lib-usage.sh @@ -7,6 +7,7 @@ test_description='Usage of git-filter-repo as a library' export PYTHONPATH=$(dirname $TEST_DIRECTORY):$PYTHONPATH # Avoid writing git_filter_repo.pyc file export PYTHONDONTWRITEBYTECODE=1 +export CONTRIB_DIR=$TEST_DIRECTORY/../contrib/filter-repo-demos setup() { @@ -163,4 +164,27 @@ test_expect_success 'other error cases' ' ) ' +test_expect_success 'lint-history' ' + test_create_repo lint-history && + ( + cd lint-history && + echo initial >content && + git add content && + git commit -m "initial" && + + printf "CRLF is stupid\r\n" >content && + git add content && + git commit -m "make a statement" && + + printf "CRLF is stupid\n" >content && + git add content && + git commit -m "oops, that was embarassing" && + + $CONTRIB_DIR/lint-history --filenames-important dos2unix && + echo 2 >expect && + git rev-list --count HEAD >actual && + test_cmp expect actual + ) +' + test_done