filter-repo: record suboptimality notes about changing merges to non-merges

When the pruning of empty commits causes a culling of parents of a merge
commit, so that the merge commit drops to just one parent, the commit
likely becomes misleading since the commit is no longer a merge commit
but the message probably implies it is.  (e.g. "Merge branch maint into
master").  There's nothing we can do to automatically fix this, but we
can note it as a suboptimal issue in the filtering process.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2018-10-17 18:31:26 -07:00
parent 9e02ac95e4
commit e3fde7689c

View File

@ -18,6 +18,7 @@ import os
import re import re
import subprocess import subprocess
import sys import sys
import textwrap
from email.Utils import unquote from email.Utils import unquote
from datetime import tzinfo, timedelta, datetime from datetime import tzinfo, timedelta, datetime
@ -647,6 +648,12 @@ class FastExportFilter(object):
# of its ancestors. # of its ancestors.
self._graph = AncestryGraph() self._graph = AncestryGraph()
# A set of commit hash pairs (oldhash, newhash) which used to be merge
# commits but due to filtering were turned into non-merge commits.
# The commits probably have suboptimal commit messages (e.g. "Merge branch
# next into master").
self._commits_no_longer_merges = []
# A handle to the input source for the fast-export data # A handle to the input source for the fast-export data
self._input = None self._input = None
@ -984,6 +991,17 @@ class FastExportFilter(object):
if not commit.dumped: if not commit.dumped:
if merge_commit or not had_file_changes or commit.file_changes: if merge_commit or not had_file_changes or commit.file_changes:
commit.dump(self._output) commit.dump(self._output)
new_id = None
# Determine the mapping of old commit hash to new one
if commit.original_id and fast_import_pipes:
fi_input, fi_output = fast_import_pipes
fi_input.write("get-mark :{}\n".format(commit.id))
orig_id = commit.original_id
new_id = fi_output.readline().rstrip()
# Now, record if this was a merge commit that turned into a non-merge
# commit.
if num_original_parents > 1 and not merge_commit:
self._commits_no_longer_merges.append((orig_id, new_id))
else: else:
# We skip empty commits, but want to keep track to make sure we don't # We skip empty commits, but want to keep track to make sure we don't
# lose any refs this way. # lose any refs this way.
@ -1134,6 +1152,24 @@ class FastExportFilter(object):
batch_check_process.stdin.close() batch_check_process.stdin.close()
batch_check_process.wait() batch_check_process.wait()
with open(os.path.join(metadata_dir, 'suboptimal-issues'), 'w') as f:
issues_found = False
if self._commits_no_longer_merges:
issues_found = True
f.write(textwrap.dedent('''
The following commits used to be merge commits but due to filtering
are now regular commits; they likely have suboptimal commit messages
(e.g. "Merge branch next into master"). Original commit hash on the
left, commit hash after filtering/rewriting on the right:
'''[1:]))
for oldhash, newhash in self._commits_no_longer_merges:
f.write(' {} {}\n'.format(oldhash, newhash))
f.write('\n')
if not issues_found:
f.write("No filtering problems encountered.")
def get_seen_refs(self): def get_seen_refs(self):
return self._seen_refs.keys() return self._seen_refs.keys()