filter-repo: simplify API for parent handling in Commit object

While the underlying fast-export and fast-import streams explicitly
separate 'from' commit (first parent) and 'merge' commits (all other
parents), foisting that separation into the Commit object for
filter-repo forces additional places in the code to deal with that
distinction.  It results in less clear code, and especially does not
make sense to push upon folks who may want to use filter-repo as a
library.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-04-26 07:28:16 -06:00
parent bec6bd8d3c
commit 805ce360fa
3 changed files with 32 additions and 64 deletions

View File

@ -569,8 +569,7 @@ class Commit(_GitElementWithId):
committer_name, committer_email, committer_date, committer_name, committer_email, committer_date,
message, message,
file_changes, file_changes,
from_commit = None, parents,
merge_commits = [],
original_id = None, original_id = None,
**kwargs): **kwargs):
_GitElementWithId.__init__(self) _GitElementWithId.__init__(self)
@ -609,12 +608,7 @@ class Commit(_GitElementWithId):
# are also represented as git elements # are also represented as git elements
self.file_changes = file_changes self.file_changes = file_changes
# Record the commit to initialize this branch from. This revision will be self.parents = parents
# the first parent of the new commit
self.from_commit = from_commit
# Record additional parent commits
self.merge_commits = merge_commits
def dump(self, file_): def dump(self, file_):
""" """
@ -625,7 +619,7 @@ class Commit(_GitElementWithId):
# Make output to fast-import slightly easier for humans to read if the # Make output to fast-import slightly easier for humans to read if the
# message has no trailing newline of its own; cosmetic, but a nice touch... # message has no trailing newline of its own; cosmetic, but a nice touch...
extra_newline = '\n' extra_newline = '\n'
if self.message.endswith('\n') or not (self.from_commit or self.file_changes): if self.message.endswith('\n') or not (self.parents or self.file_changes):
extra_newline = '' extra_newline = ''
file_.write(('commit {}\n' file_.write(('commit {}\n'
@ -640,37 +634,24 @@ class Commit(_GitElementWithId):
len(self.message), self.message, len(self.message), self.message,
extra_newline) extra_newline)
) )
if self.from_commit: for i, parent in enumerate(self.parents):
mark = ':' if isinstance(self.from_commit, int) else '' mark = ':' if isinstance(parent, int) else ''
file_.write('from {}{}\n'.format(mark, self.from_commit)) file_.write('from ' if i==0 else 'merge ')
for ref in self.merge_commits: file_.write('{}{}\n'.format(mark, parent))
mark = ':' if isinstance(ref, int) else ''
file_.write('merge {}{}\n'.format(mark, ref))
for change in self.file_changes: for change in self.file_changes:
change.dump(file_) change.dump(file_)
if not self.from_commit and not self.file_changes: if not self.parents and not self.file_changes:
# Workaround a bug in pre-git-2.22 versions of fast-import with # Workaround a bug in pre-git-2.22 versions of fast-import with
# the get-mark directive. # the get-mark directive.
file_.write('\n') file_.write('\n')
file_.write('\n') file_.write('\n')
def get_parents(self):
"""
Return all parent commits
"""
my_parents = []
if self.from_commit:
my_parents.append(self.from_commit)
my_parents += self.merge_commits
return my_parents
def first_parent(self): def first_parent(self):
""" """
Return first parent commit Return first parent commit
""" """
my_parents = self.get_parents() if self.parents:
if my_parents: return self.parents[0]
return my_parents[0]
return None return None
def skip(self, new_id=None): def skip(self, new_id=None):
@ -1209,11 +1190,8 @@ class FastExportFilter(object):
[x in _SKIPPED_COMMITS for x in orig_parents]) [x in _SKIPPED_COMMITS for x in orig_parents])
tmp2 = [x for x in tmp if x[0] is not None] tmp2 = [x for x in tmp if x[0] is not None]
if not tmp2: if not tmp2:
# All ancestors have been pruned; we have no parents. Note that the # All ancestors have been pruned; we have no parents.
# way fast-export/fast-import split parents into from_commit and return [], None
# merge_commits means we'd rather a parentless commit be represented
# as a list containing a single None entry.
return [None], None
parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)] parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)]
# We can't have redundant parents if we don't have at least 2 parents # We can't have redundant parents if we don't have at least 2 parents
@ -1268,9 +1246,7 @@ class FastExportFilter(object):
return parents, None return parents, None
def prunable(self, commit, new_1st_parent, had_file_changes, orig_parents): def prunable(self, commit, new_1st_parent, had_file_changes, orig_parents):
parents = [commit.from_commit] + commit.merge_commits parents = commit.parents
if not commit.from_commit:
parents = []
# For merge commits, unless there are prunable (redundant) parents, we # For merge commits, unless there are prunable (redundant) parents, we
# do not want to prune # do not want to prune
@ -1334,8 +1310,7 @@ class FastExportFilter(object):
# the rest. But I'm worried about fast-import blocking on fi_output # the rest. But I'm worried about fast-import blocking on fi_output
# buffers filling up so I instead read from it as I go. # buffers filling up so I instead read from it as I go.
for change in commit.file_changes: for change in commit.file_changes:
parent = new_1st_parent or commit.from_commit parent = new_1st_parent or commit.parents[0] # exists due to above checks
assert parent # Should be good based on the checks above
self._output.write("ls :{} {}\n".format(parent, change.filename)) self._output.write("ls :{} {}\n".format(parent, change.filename))
self._output.flush() self._output.flush()
parent_version = fi_output.readline().split() parent_version = fi_output.readline().split()
@ -1368,7 +1343,7 @@ class FastExportFilter(object):
self._flush_renames(None, limit=40) self._flush_renames(None, limit=40)
# Also, record if this was a merge commit that turned into a non-merge # Also, record if this was a merge commit that turned into a non-merge
# commit. # commit.
if len(orig_parents) >= 2 and not commit.merge_commits: if len(orig_parents) >= 2 and len(commit.parents) < 2:
self._commits_no_longer_merges.append((commit.original_id, new_id)) self._commits_no_longer_merges.append((commit.original_id, new_id))
def num_commits_parsed(self): def num_commits_parsed(self):
@ -1413,13 +1388,16 @@ class FastExportFilter(object):
pinfo.append(self._parse_optional_parent_ref('merge')) pinfo.append(self._parse_optional_parent_ref('merge'))
orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)] orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)]
# Prune parents (due to pruning of empty commits) if relevant # No parents is oddly represented as [None] instead of [], due to the
parents, new_1st_parent = self.trim_extra_parents(orig_parents, parents) # special 'from' handling. Convert it here to a more canonical form.
from_commit = parents[0] if parents == [None]:
merge_commits = parents[1:] parents = []
if orig_parents == [None]: if orig_parents == [None]:
orig_parents = [] orig_parents = []
# Prune parents (due to pruning of empty commits) if relevant
parents, new_1st_parent = self.trim_extra_parents(orig_parents, parents)
# Get the list of file changes # Get the list of file changes
file_changes = [] file_changes = []
file_change = self._parse_optional_filechange() file_change = self._parse_optional_filechange()
@ -1437,8 +1415,7 @@ class FastExportFilter(object):
committer_name, committer_email, committer_date, committer_name, committer_email, committer_date,
commit_msg, commit_msg,
file_changes, file_changes,
from_commit, parents,
merge_commits,
original_id) original_id)
# If fast-export text had a mark for this commit, need to make sure this # If fast-export text had a mark for this commit, need to make sure this
@ -1448,11 +1425,11 @@ class FastExportFilter(object):
_IDS.record_rename(id_, commit.id) _IDS.record_rename(id_, commit.id)
# Record ancestry graph # Record ancestry graph
external_parents = [p for p in commit.get_parents() external_parents = [p for p in commit.parents
if not isinstance(p, int)] if not isinstance(p, int)]
self._graph.record_external_commits(external_parents) self._graph.record_external_commits(external_parents)
self._orig_graph.record_external_commits(external_parents) self._orig_graph.record_external_commits(external_parents)
self._graph.add_commit_and_parents(commit.id, commit.get_parents()) self._graph.add_commit_and_parents(commit.id, commit.parents)
self._orig_graph.add_commit_and_parents(id_, orig_parents) self._orig_graph.add_commit_and_parents(id_, orig_parents)
# Record the original list of file changes relative to first parent # Record the original list of file changes relative to first parent
@ -1464,10 +1441,6 @@ class FastExportFilter(object):
if self._everything_callback: if self._everything_callback:
self._everything_callback(commit) self._everything_callback(commit)
# Sanity check that user callbacks didn't violate assumption on parents
if commit.merge_commits:
assert commit.from_commit is not None
# Find out which files were modified by the callbacks. Such paths could # Find out which files were modified by the callbacks. Such paths could
# lead to sebsequent commits being empty (e.g. if removed a line containing # lead to sebsequent commits being empty (e.g. if removed a line containing
# a password from every version of a file that had the password, and some # a password from every version of a file that had the password, and some

View File

@ -43,8 +43,7 @@ commit1 = Commit("refs/heads/master",
"Com M. Iter", "comm@iter.email", when_string, "Com M. Iter", "comm@iter.email", when_string,
"My first commit! Wooot!\n\nLonger description", "My first commit! Wooot!\n\nLonger description",
changes, changes,
from_commit = None, parents = [])
merge_commits = [])
commit1.dump(output) commit1.dump(output)
world = Blob("Hello\nHi") world = Blob("Hello\nHi")
@ -61,8 +60,7 @@ commit2 = Commit("refs/heads/master",
"Com M. Iter", "comm@iter.email", when_string, "Com M. Iter", "comm@iter.email", when_string,
"Make a symlink to world called planet, modify world", "Make a symlink to world called planet, modify world",
changes, changes,
from_commit = commit1.id, parents = [commit1.id])
merge_commits = [])
commit2.dump(output) commit2.dump(output)
script = Blob("#!/bin/sh\n\necho Hello") script = Blob("#!/bin/sh\n\necho Hello")
@ -75,8 +73,7 @@ commit3 = Commit("refs/heads/master",
"Com M. Iter", "comm@iter.email", when_string, "Com M. Iter", "comm@iter.email", when_string,
"Add runme script, remove bar", "Add runme script, remove bar",
changes, changes,
from_commit = commit2.id, parents = [commit2.id])
merge_commits = [])
commit3.dump(output) commit3.dump(output)
progress = Progress("Done with the master branch now...") progress = Progress("Done with the master branch now...")
@ -98,8 +95,7 @@ commit4 = Commit("refs/heads/devel",
"Com M. Iter", "comm@iter.email", when_string, "Com M. Iter", "comm@iter.email", when_string,
"Modify world", "Modify world",
changes, changes,
from_commit = commit1.id, parents = [commit1.id])
merge_commits = [])
commit4.dump(output) commit4.dump(output)
world = Blob("Hello\nHi\nGoodbye") world = Blob("Hello\nHi\nGoodbye")
@ -120,8 +116,7 @@ commit5 = Commit("refs/heads/devel",
"Com M. Iter", "comm@iter.email", when_string, "Com M. Iter", "comm@iter.email", when_string,
"Merge branch 'master'\n", "Merge branch 'master'\n",
changes, changes,
from_commit = commit4.id, parents = [commit4.id, commit3.id])
merge_commits = [commit3.id])
commit5.dump(output) commit5.dump(output)

View File

@ -39,9 +39,9 @@ class InterleaveRepositories:
# Splice in any extra commits needed # Splice in any extra commits needed
if prev_letter in self.commit_map: if prev_letter in self.commit_map:
new_commit = self.commit_map[prev_letter] new_commit = self.commit_map[prev_letter]
new_commit.from_commit = self.last_commit new_commit.parents = [self.last_commit] if self.last_commit else []
new_commit.dump(self.out._output) new_commit.dump(self.out._output)
commit.from_commit = new_commit.id commit.parents = [new_commit.id]
# Dump our commit now # Dump our commit now
commit.dump(self.out._output) commit.dump(self.out._output)