diff --git a/git-filter-repo b/git-filter-repo index 7160d1a..b959efc 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -65,6 +65,65 @@ class FixedTimeZone(tzinfo): def dst(self, dt): return timedelta(0) +class AncestryGraph(object): + """ + A class that maintains a direct acycle graph of commits for the purpose of + determining if one commit is the ancestor of another. + """ + + def __init__(self): + self.cur_value = 0 + + # A mapping from the external identifers given to us to the simple integers + # we use in self.graph + self.value = {} + + # A tuple of (depth, list-of-ancestors). Values and keys in this graph are + # all integers from the self.value dict. The depth of a commit is one more + # than the max depth of any of its ancestors. + self.graph = {} + + def add_commit_and_parents(self, commit, parents): + """ + Record in graph that commit has the given parents. parents _MUST_ have + been first recorded. commit _MUST_ not have been recorded yet. + """ + assert all(p in self.value for p in parents) + assert commit not in self.value + + # Get values for commit and parents + self.cur_value += 1 + self.value[commit] = self.cur_value + graph_parents = [self.value[x] for x in parents] + + # Determine depth for commit, then insert the info into the graph + depth = 1 + if parents: + depth += max(self.graph[p][0] for p in graph_parents) + self.graph[self.cur_value] = (depth, graph_parents) + + def is_ancestor(self, possible_ancestor, check): + """ + Return whether possible_ancestor is an ancestor of check + """ + a, b = self.value[possible_ancestor], self.value[check] + a_depth = self.graph[a][0] + ancestors = [b] + visited = set() + while ancestors: + ancestor = ancestors.pop() + if ancestor in visited: + continue + visited.add(ancestor) + depth, more_ancestors = self.graph[ancestor] + if ancestor == a: + return True + elif depth <= a_depth: + continue + ancestors.extend(more_ancestors) + return False + + class _IDs(object): """ A class that maintains the 'name domain' of all the 'marks' (short int @@ -579,6 +638,12 @@ class FastExportFilter(object): # to if the last (or even only) commit on that branch was pruned self._seen_refs = {} + # A tuple of (depth, list-of-ancestors). Commits and ancestors are + # identified by their id (their 'mark' in fast-export or fast-import + # speak). The depth of a commit is one more than the max depth of any + # of its ancestors. + self._graph = AncestryGraph() + # A handle to the input source for the fast-export data self._input = None @@ -801,7 +866,27 @@ class FastExportFilter(object): merge_ref = self._parse_optional_parent_ref('merge') was_merge = len(parents) > 1 + # Remove redundant parents (if both sides of history are empty commits, + # the most recent ancestor on both sides may be the same commit). parents = collections.OrderedDict.fromkeys(parents).keys() + + # Flatten unnecessary merges. (If one side of history is entirely + # empty commits that were pruned, we may end up attempting to + # merge a commit with its ancestor. Remove parents that are an + # ancestor of another parent.) + num_original_parents = len(parents) + if num_original_parents > 1: + to_remove = [] + for cur in xrange(num_original_parents): + for other in xrange(num_original_parents): + if cur != other and self._graph.is_ancestor(parents[cur], + parents[other]): + to_remove.append(cur) + for x in reversed(to_remove): + parents.pop(x) + + # Record our new parents after above pruning of parents representing + # pruned empty histories from_commit = parents[0] merge_commits = parents[1:] @@ -831,6 +916,9 @@ class FastExportFilter(object): commit.old_id = id_ _IDS.record_rename(id_, commit.id) + # Record ancestry graph + self._graph.add_commit_and_parents(commit.id, commit.get_parents()) + # Call any user callback to allow them to modify the commit if self._commit_callback: self._commit_callback(commit)