mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-06 18:32:14 +02:00
filter-repo: handle blob callbacks without excessive empty-pruning checks
If we have blob callbacks, we cannot pass --no-data to fast-export. Also, with blob callbacks, any file the callback modifies could match the modification done to the file by a subsequent commit, possibly making the later commit empty. As such, we keep a record of all filenames modified (by blob or commit callbacks), and then check all these filenames for all subsequent commits to see if it causes empty commits. In particular, if files other than these are modified in a non-merge commit, we know that the commit will not become empty so we can bypass the empty-pruning checks. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
dbdb18170b
commit
6fffed6bb1
@ -806,6 +806,10 @@ class FastExportFilter(object):
|
|||||||
# Whether we've run our post-processing extra commands
|
# Whether we've run our post-processing extra commands
|
||||||
self._finalize_handled = False
|
self._finalize_handled = False
|
||||||
|
|
||||||
|
# Names of files that were tweaked in any commit; such paths could lead
|
||||||
|
# to subsequent commits being empty
|
||||||
|
self._files_tweaked = set()
|
||||||
|
|
||||||
def _advance_currentline(self):
|
def _advance_currentline(self):
|
||||||
"""
|
"""
|
||||||
Grab the next line of input
|
Grab the next line of input
|
||||||
@ -1130,10 +1134,11 @@ class FastExportFilter(object):
|
|||||||
if not fast_import_pipes:
|
if not fast_import_pipes:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Perf hack: since we don't support blob rewriting yet, non-merge commits
|
# non-merge commits can only be empty if blob/file-change editing caused
|
||||||
# can only be empty if commit.file_changes is empty, which we checked
|
# all file changes in the commit to have the same file contents as
|
||||||
# above. So return early in such a case.
|
# the parent.
|
||||||
if len(orig_parents) < 2:
|
changed_files = set(change.filename for change in commit.file_changes)
|
||||||
|
if len(orig_parents) < 2 and changed_files - self._files_tweaked:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Finally, the hard case: due to either blob rewriting, or due to pruning
|
# Finally, the hard case: due to either blob rewriting, or due to pruning
|
||||||
@ -1272,6 +1277,9 @@ class FastExportFilter(object):
|
|||||||
# Record ancestry graph
|
# Record ancestry graph
|
||||||
self._graph.add_commit_and_parents(commit.id, commit.get_parents())
|
self._graph.add_commit_and_parents(commit.id, commit.get_parents())
|
||||||
|
|
||||||
|
# Record the original list of file changes relative to first parent
|
||||||
|
orig_file_changes = set(commit.file_changes)
|
||||||
|
|
||||||
# Call any user callback to allow them to modify the commit
|
# Call any user callback to allow them to modify the commit
|
||||||
if self._commit_callback:
|
if self._commit_callback:
|
||||||
self._commit_callback(commit)
|
self._commit_callback(commit)
|
||||||
@ -1282,6 +1290,14 @@ class FastExportFilter(object):
|
|||||||
if commit.merge_commits:
|
if commit.merge_commits:
|
||||||
assert commit.from_commit is not None
|
assert commit.from_commit is not None
|
||||||
|
|
||||||
|
# Find out which files were modified by the callbacks. Such paths could
|
||||||
|
# lead to sebsequent commits being empty (e.g. if removed a line containing
|
||||||
|
# a password from every version of a file that had the password, and some
|
||||||
|
# later commit did nothing more than remove that line)
|
||||||
|
final_file_changes = set(commit.file_changes)
|
||||||
|
differences = orig_file_changes.symmetric_difference(final_file_changes)
|
||||||
|
self._files_tweaked.update(x.filename for x in differences)
|
||||||
|
|
||||||
# Now print the resulting commit, or if prunable skip it
|
# Now print the resulting commit, or if prunable skip it
|
||||||
if not commit.dumped:
|
if not commit.dumped:
|
||||||
if not self.prunable(commit, new_1st_parent, had_file_changes,
|
if not self.prunable(commit, new_1st_parent, had_file_changes,
|
||||||
@ -2555,12 +2571,13 @@ class RepoFilter(object):
|
|||||||
input = sys.stdin
|
input = sys.stdin
|
||||||
fe_orig = None
|
fe_orig = None
|
||||||
else:
|
else:
|
||||||
|
skip_blobs = blob_callback is None and everything_callback is None
|
||||||
|
extra_flags = ['--no-data'] if skip_blobs else []
|
||||||
fep_cmd = ['git', 'fast-export',
|
fep_cmd = ['git', 'fast-export',
|
||||||
'--show-original-ids',
|
'--show-original-ids',
|
||||||
'--signed-tags=strip',
|
'--signed-tags=strip',
|
||||||
'--tag-of-filtered-object=rewrite',
|
'--tag-of-filtered-object=rewrite',
|
||||||
'--no-data',
|
'--use-done-feature'] + extra_flags + args.refs
|
||||||
'--use-done-feature'] + args.refs
|
|
||||||
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
fep = subprocess.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)
|
||||||
input = fep.stdout
|
input = fep.stdout
|
||||||
if args.dry_run or args.debug:
|
if args.dry_run or args.debug:
|
||||||
|
Loading…
Reference in New Issue
Block a user