mirror of
https://github.com/newren/git-filter-repo.git
synced 2024-07-04 01:15:41 +02:00
filter-repo: add automatic rewriting of commit hashes in commit messages
Commit messages often refer to past commits; while rewriting commits we would also like to update these commit messages to refer to the new commit names. In the case that a commit message references another commit which was dropped by the filtering process, we have no way to rewrite the commit message to reference a valid commit hash. Instead of dying, note the suboptimal commit in the suboptimal-issues file. Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
parent
f95308c5eb
commit
af081d0fce
|
@ -665,6 +665,26 @@ class FastExportFilter(object):
|
|||
# next into master").
|
||||
self._commits_no_longer_merges = []
|
||||
|
||||
# A dict of original_ids to new_ids; filtering commits means getting
|
||||
# new commit hash (sha1sums), and we record the mapping both for
|
||||
# diagnostic purposes and so we can rewrite commit messages. Note that
|
||||
# the new_id can be None rather than a commit hash if the original
|
||||
# commit became empty and was pruned or was otherwise dropped.
|
||||
self._commit_renames = {}
|
||||
|
||||
# A dict of commit_hash[1:7] -> set(commit_hashes with that prefix).
|
||||
#
|
||||
# It's common for commit messages to refer to commits by abbreviated
|
||||
# commit hashes, as short as 7 characters. To facilitate translating
|
||||
# such short hashes, we have a mapping of prefixes to full old hashes.
|
||||
self._commit_short_old_hashes = collections.defaultdict(set)
|
||||
|
||||
# A set of commit hash references appearing in commit messages which
|
||||
# mapped to a valid commit that was removed entirely in the filtering
|
||||
# process. The commit message will continue to reference the
|
||||
# now-missing commit hash, since there was nothing to map it to.
|
||||
self._commits_referenced_but_removed = set()
|
||||
|
||||
# A handle to the input source for the fast-export data
|
||||
self._input = None
|
||||
|
||||
|
@ -867,6 +887,26 @@ class FastExportFilter(object):
|
|||
if not reset.dumped:
|
||||
reset.dump(self._output)
|
||||
|
||||
def _translate_commit_hash(self, matchobj):
|
||||
old_hash = matchobj.group(1)
|
||||
orig_len = len(old_hash)
|
||||
if old_hash not in self._commit_renames:
|
||||
if old_hash[0:7] not in self._commit_short_old_hashes:
|
||||
return old_hash
|
||||
possibilities = self._commit_short_old_hashes[old_hash[0:7]]
|
||||
matches = [x for x in possibilities
|
||||
if x[0:orig_len] == old_hash]
|
||||
if len(matches) != 1:
|
||||
return old_hash
|
||||
old_hash = matches[0]
|
||||
|
||||
new_hash = self._commit_renames[old_hash]
|
||||
if new_hash is None:
|
||||
self._commits_referenced_but_removed.add(old_hash)
|
||||
return old_hash[0:orig_len]
|
||||
else:
|
||||
return new_hash[0:orig_len]
|
||||
|
||||
def _parse_commit(self, fast_import_pipes):
|
||||
"""
|
||||
Parse input data into a Commit object. Once the Commit has been created,
|
||||
|
@ -897,6 +937,9 @@ class FastExportFilter(object):
|
|||
(committer_name, committer_email, committer_date)
|
||||
|
||||
commit_msg = self._parse_data()
|
||||
commit_msg = re.sub(r'(\b[0-9a-f]{7,40}\b)',
|
||||
self._translate_commit_hash,
|
||||
commit_msg)
|
||||
|
||||
parents = []
|
||||
parents.append(self._parse_optional_parent_ref('from'))
|
||||
|
@ -1019,12 +1062,14 @@ class FastExportFilter(object):
|
|||
(not had_file_changes and len(parents) >= 1)):
|
||||
commit.dump(self._output)
|
||||
new_id = None
|
||||
# Determine the mapping of old commit hash to new one
|
||||
# Record the mapping of old commit hash to new one
|
||||
if commit.original_id and fast_import_pipes:
|
||||
fi_input, fi_output = fast_import_pipes
|
||||
fi_input.write("get-mark :{}\n".format(commit.id))
|
||||
orig_id = commit.original_id
|
||||
new_id = fi_output.readline().rstrip()
|
||||
self._commit_renames[orig_id] = new_id
|
||||
self._commit_short_old_hashes[orig_id[0:7]].add(orig_id)
|
||||
# Now, record if this was a merge commit that turned into a non-merge
|
||||
# commit.
|
||||
if num_original_parents > 1 and not merge_commit:
|
||||
|
@ -1034,6 +1079,7 @@ class FastExportFilter(object):
|
|||
# lose any refs this way.
|
||||
self._seen_refs[branch] = commit.first_parent()
|
||||
commit.skip(commit.first_parent())
|
||||
self._commit_renames[commit.original_id] = None
|
||||
|
||||
def _parse_tag(self):
|
||||
"""
|
||||
|
@ -1201,6 +1247,17 @@ class FastExportFilter(object):
|
|||
f.write(' {} {}\n'.format(oldhash, newhash))
|
||||
f.write('\n')
|
||||
|
||||
if self._commits_referenced_but_removed:
|
||||
issues_found = True
|
||||
f.write(textwrap.dedent('''
|
||||
The following commits were filtered out, but referenced in another
|
||||
commit message. The reference to the now-nonexistent commit hash
|
||||
(or a substring thereof) was left as-is in any commit messages:
|
||||
'''[1:]))
|
||||
for bad_commit_reference in self._commits_referenced_but_removed:
|
||||
f.write(' {}\n'.format(bad_commit_reference))
|
||||
f.write('\n')
|
||||
|
||||
if not issues_found:
|
||||
f.write("No filtering problems encountered.")
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user