filter-repo: fix detection of merge becoming empty commit

In the previous commit, we detected when an entire line of history back to
a common ancestor of the merge became empty commits, and avoided having a
commit be merged with itself.  This commits looks through the changes
specified in the commit, which are always specified relative to the first
parent, so that if the first parent side was the empty one we can still
detect if the merge commit adds no extra changes relative to its remaining
parent.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2018-09-04 17:43:06 -07:00
parent 70505e00f9
commit d13f7e9178

View File

@ -831,7 +831,7 @@ class FastExportFilter(object):
if not reset.dumped:
reset.dump(self._output)
def _parse_commit(self):
def _parse_commit(self, fast_import_pipes):
"""
Parse input data into a Commit object. Once the Commit has been created,
it will be handed off to the appropriate callbacks. Current-line will
@ -875,6 +875,7 @@ class FastExportFilter(object):
# merge a commit with its ancestor. Remove parents that are an
# ancestor of another parent.)
num_original_parents = len(parents)
check_merge_now_empty = False
if num_original_parents > 1:
to_remove = []
for cur in xrange(num_original_parents):
@ -884,6 +885,8 @@ class FastExportFilter(object):
to_remove.append(cur)
for x in reversed(to_remove):
parents.pop(x)
if len(parents) == 1:
check_merge_now_empty = True
# Record our new parents after above pruning of parents representing
# pruned empty histories
@ -900,6 +903,48 @@ class FastExportFilter(object):
if self._currentline == '\n':
self._advance_currentline()
# If we had a merge commit and the first parent history back to the
# merge base was entirely composed of commits made empty by our
# filtering, it is likely that this merge commit is empty and can be
# pruned too. Check by comparing the contents of this merge to its
# remaining parent.
#
# NOTES on why/how this works:
# 1. fast-export always gives file changes in a merge commit relative
# to the first parent.
# 2. The only way this 'if' is active is when the first parent was
# an ancestor of what is now the only remaining parent
# 3. The two above imply that the file changes we're looking at are
# just for the line of history for the remaining parent, and show
# all changes needed to make the original first parent (whose tree
# matched an ancestor of the remaining parent) match the merge's tree.
# 4. If the versions of all specified files in the remaining parent
# match the file change versions, then this "merge" commit is
# actually going to be an empty non-merge commit and we should prune
# it.
if check_merge_now_empty and fast_import_pipes:
unnecessary_filechanges = set()
fi_input, fi_output = fast_import_pipes
# Optimization note: we could have two loops over file_changes, the
# first doing all the fi_input.write() calls, and the second doing the
# rest. But I'm worried about fast-import blocking on fi_output
# buffers filling up so I instead read from it as I go.
for change in file_changes:
fi_input.write("ls :{} {}\n".format(from_commit, change.filename))
parent_version = fi_output.readline().split()
if change.type == 'D':
if parent_version == ['missing', change.filename]:
unnecessary_filechanges.add(change)
else:
blob_sha = change.blob_id
if isinstance(change.blob_id, int):
fi_input.write("get-mark :{}\n".format(change.blob_id))
blob_sha = fi_output.readline().rstrip()
if parent_version == [change.mode, 'blob', blob_sha, change.filename]:
unnecessary_filechanges.add(change)
file_changes = [change for change in file_changes
if change not in unnecessary_filechanges]
# Okay, now we can finally create the Commit object
commit = Commit(branch,
author_name, author_email, author_date,
@ -1051,7 +1096,7 @@ class FastExportFilter(object):
def get_seen_refs(self):
return self._seen_refs.keys()
def run(self, *args):
def run(self, *args, **kwargs):
"""
This method performs the filter. The method optionally takes two arguments.
The first represents the source repository (either a file object
@ -1110,7 +1155,7 @@ class FastExportFilter(object):
elif self._currentline.startswith('reset'):
self._parse_reset()
elif self._currentline.startswith('commit'):
self._parse_commit()
self._parse_commit(kwargs.get('fast_import_pipes', None))
elif self._currentline.startswith('tag'):
self._parse_tag()
elif self._currentline.startswith('progress'):
@ -1473,9 +1518,12 @@ def run_fast_filter():
print(" (saving a copy of the output at {})".format(fe_orig))
# Determine where to send output
pipes = None
if not args.dry_run:
fip_cmd = 'git fast-import --force --quiet'.split()
fip = subprocess.Popen(fip_cmd, stdin=subprocess.PIPE)
fip = subprocess.Popen(fip_cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
pipes = (fip.stdin, fip.stdout)
if args.dry_run or args.debug:
fe_filt = os.path.join(results_tmp_dir, 'fast-export.filtered')
output = open(fe_filt, 'w')
@ -1490,7 +1538,7 @@ def run_fast_filter():
filter = FastExportFilter(
commit_callback = lambda c : tweak_commit(args, c),
)
filter.run(input, output)
filter.run(input, output, fast_import_pipes = pipes)
# Close the output, ensure fast-export and fast-import have completed
output.close()