diff --git a/README.md b/README.md index 0964d02..d659d69 100644 --- a/README.md +++ b/README.md @@ -202,22 +202,22 @@ provide at least one of the last three traits as well: the pruning of parents and other ancestors can ultimately result in the loss of one or more parents. If a merge commit loses enough parents to become a non-merge commit and it has no file - changes, then it too can be pruned. Topology changes are also - possible if the entire non-first-parent history is pruned away; - rather than having that parent of the merge be rewritten to the - merge base, it may (depending on whether the merge also had file - changes of its own) instead make sense to just prune that parent. - (We do not want to prune away a first parent being rewritten to - the merge base since some projects prefer --no-ff merges, though - this could be made an option.) Finally, note that we originally - talked not about pruning empty commits, but about pruning commits - which become empty. Some projects intentionally create empty - commits for versioning or publishing reasons, and these should - not be removed. Instead, only commits which become empty should - be pruned. (As a special case, commits which started empty but - originally had a parent and which become a root commit due to the - pruning of other commits will also be considered to have "become - empty".) + changes, then it too can be pruned. Merge commits can also have + a topology that becomes degenerate: it could end up with the + merge_base serving as both parents (if all intervening commits + from the original repo were pruned), or it could end up with one + parent which is an ancestor of its other parent. In such cases, + if the merge has no file changes of its own, then the merge + commit can also be pruned. However, if the merge commit was + already degenerate in the original history, then it was probably + intentional and the merge commit will not be pruned. Finally, + note that we originally talked about pruning commits which become + empty, NOT about pruning empty commits. Some projects + intentionally create empty commits for versioning or publishing + reasons, and these should not be removed. Instead, only commits + which become empty should be pruned. (As a special case, commits + which started empty but whose parent was pruned away will also be + considered to have "become empty".) 1. [Speed] Filtering should be reasonably fast diff --git a/git-filter-repo b/git-filter-repo index 62afb78..96f42dd 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -806,6 +806,8 @@ class FastExportFilter(object): # speak). The depth of a commit is one more than the max depth of any # of its ancestors. self._graph = AncestryGraph() + # Another one, for ancestry of commits in the original repo + self._orig_graph = AncestryGraph() # A set of commit hash pairs (oldhash, newhash) which used to be merge # commits but due to filtering were turned into non-merge commits. @@ -1174,15 +1176,17 @@ class FastExportFilter(object): # 'None') # Remove all parents rewritten to None, and keep track of which parents # were rewritten to an ancestor. - tmp = zip(parents, [x in _SKIPPED_COMMITS for x in orig_parents]) + tmp = zip(parents, + orig_parents, + [x in _SKIPPED_COMMITS for x in orig_parents]) tmp2 = [x for x in tmp if x[0] is not None] - parents, is_rewritten = [list(x) for x in zip(*tmp2)] if tmp2 else ([], []) - - # However, the way fast-export/fast-import split parents into from_commit - # and merge_commits means we'd rather a parentless commit be represented - # as a list containing a single None entry. - if not parents: - parents.append(None) + if not tmp2: + # All ancestors have been pruned; we have no parents. Note that the + # way fast-export/fast-import split parents into from_commit and + # merge_commits means we'd rather a parentless commit be represented + # as a list containing a single None entry. + return [None], None + parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)] # We can't have redundant parents if we don't have at least 2 parents if len(parents) < 2: @@ -1197,9 +1201,9 @@ class FastExportFilter(object): # Deleting duplicate rewritten parents means keeping parents if either # they have not been seen or they are ones that have not been rewritten. parents_copy = parents - pairs = [[p, is_rewritten[i]] for i, p in enumerate(parents) - if not (p in seen or seen_add(p)) or not is_rewritten[i]] - parents, is_rewritten = [list(x) for x in zip(*pairs)] + uniq = [[p, orig_parents[i], is_rewritten[i]] for i, p in enumerate(parents) + if not (p in seen or seen_add(p)) or not is_rewritten[i]] + parents, orig_parents, is_rewritten = [list(x) for x in zip(*uniq)] if len(parents) < 2: return parents_copy, parents[0] @@ -1213,10 +1217,21 @@ class FastExportFilter(object): if not is_rewritten[cur]: continue for other in xrange(num_parents): - if cur != other and self._graph.is_ancestor(parents[cur], - parents[other]): - to_remove.append(cur) - break # cur removed, so skip rest of others -- i.e. check cur+=1 + if cur == other: + continue + if not self._graph.is_ancestor(parents[cur], parents[other]): + continue + # parents[cur] is an ancestor of parents[other], so parents[cur] + # seems redundant. However, if it was intentionally redundant + # (e.g. a no-ff merge) in the original, then we want to keep it. + if self._orig_graph.is_ancestor(orig_parents[cur], + orig_parents[other]): + continue + # Okay so the cur-th parent is an ancestor of the other-th parent, + # and it wasn't that way in the original repository; mark the + # cur-th parent as removable. + to_remove.append(cur) + break # cur removed, so skip rest of others -- i.e. check cur+=1 for x in reversed(to_remove): parents.pop(x) if len(parents) < 2: @@ -1237,8 +1252,6 @@ class FastExportFilter(object): if len(orig_parents) < 2: # Special logic for commits that started empty... if not had_file_changes: - if orig_parents == [None]: - orig_parents = [] had_parents_pruned = (len(parents) < len(orig_parents) or (len(orig_parents) == 1 and orig_parents[0] in _SKIPPED_COMMITS)) @@ -1376,6 +1389,8 @@ class FastExportFilter(object): parents, new_1st_parent = self.trim_extra_parents(orig_parents, parents) from_commit = parents[0] merge_commits = parents[1:] + if orig_parents == [None]: + orig_parents = [] # Get the list of file changes file_changes = [] @@ -1407,6 +1422,7 @@ class FastExportFilter(object): # Record ancestry graph self._graph.add_commit_and_parents(commit.id, commit.get_parents()) + self._orig_graph.add_commit_and_parents(id_, orig_parents) # Record the original list of file changes relative to first parent orig_file_changes = set(commit.file_changes)