filter-repo: allow users to adjust pruning of empty & degenerate commits

We have a good default for pruning of empty commits and degenerate merge
commits: only pruning such commits that didn't start out that way (i.e.
that couldn't intentionally have been empty or degenerate).  However,
users may have reasons to want to aggressively prune such commits (maybe
they used BFG repo filter or filter-branch previously and have lots of
cruft commits that they want remoed), and we may as well allow them to
specify that they don't want pruning too, just to be flexible.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-05-09 09:28:02 -07:00
parent 3961a82ba4
commit 2c8f763426
4 changed files with 181 additions and 3 deletions

View File

@ -832,6 +832,8 @@ class FastExportFilter(object):
"""
def __init__(self, repo_working_dir,
empty_pruning = 'auto',
degenerate_pruning = 'auto',
tag_callback = None, commit_callback = None,
blob_callback = None, progress_callback = None,
reset_callback = None, checkpoint_callback = None,
@ -928,6 +930,12 @@ class FastExportFilter(object):
# to subsequent commits being empty
self._files_tweaked = set()
# Whether to do empty or degenerate pruning
assert(empty_pruning in ['always', 'auto', 'never'])
self._empty_pruning = empty_pruning
assert(degenerate_pruning in ['always', 'auto', 'never'])
self._degenerate_pruning = degenerate_pruning
# Compile some regexes and cache those
self._mark_re = re.compile(br'mark :(\d+)\n$')
self._parent_regexes = {}
@ -1231,6 +1239,10 @@ class FastExportFilter(object):
Returns a tuple:
(parents, new_first_parent_if_would_become_non_merge)'''
if self._degenerate_pruning == 'never':
return parents, None
always_prune = (self._degenerate_pruning == 'always')
# Pruning of empty commits means multiple things:
# * An original parent of this commit may have been pruned causing the
# need to rewrite the reported parent to the nearest ancestor. We
@ -1242,7 +1254,7 @@ class FastExportFilter(object):
# were rewritten to an ancestor.
tmp = zip(parents,
orig_parents,
[x in _SKIPPED_COMMITS for x in orig_parents])
[(x in _SKIPPED_COMMITS or always_prune) for x in orig_parents])
tmp2 = [x for x in tmp if x[0] is not None]
if not tmp2:
# All ancestors have been pruned; we have no parents.
@ -1285,7 +1297,8 @@ class FastExportFilter(object):
# parents[cur] is an ancestor of parents[other], so parents[cur]
# seems redundant. However, if it was intentionally redundant
# (e.g. a no-ff merge) in the original, then we want to keep it.
if self._orig_graph.is_ancestor(orig_parents[cur],
if not always_prune and \
self._orig_graph.is_ancestor(orig_parents[cur],
orig_parents[other]):
continue
# Okay so the cur-th parent is an ancestor of the other-th parent,
@ -1303,6 +1316,10 @@ class FastExportFilter(object):
def prunable(self, commit, new_1st_parent, had_file_changes, orig_parents):
parents = commit.parents
if self._empty_pruning == 'never':
return False
always_prune = (self._empty_pruning == 'always')
# For merge commits, unless there are prunable (redundant) parents, we
# do not want to prune
if len(parents) >= 2 and not new_1st_parent:
@ -1310,7 +1327,7 @@ class FastExportFilter(object):
if len(parents) < 2:
# Special logic for commits that started empty...
if not had_file_changes:
if not had_file_changes and not always_prune:
had_parents_pruned = (len(parents) < len(orig_parents) or
(len(orig_parents) == 1 and
orig_parents[0] in _SKIPPED_COMMITS))
@ -1998,6 +2015,25 @@ class FilteringOptions(object):
"choose a replacement choice other than the default of "
"'***REMOVED***'. "))
parents = parser.add_argument_group(title=_("Parent rewriting"))
parents.add_argument('--empty-pruning', default='auto',
choices=['always', 'auto', 'never'],
help=_("Whether to prune empty commits. 'auto' (the default) means "
"only prune commits which become empty (not commits which were "
"empty in the original repo, unless their parent was pruned). "
"When the parent of a commit is pruned, the first non-pruned "
"ancestor becomes the new parent."))
parents.add_argument('--degenerate-pruning', default='auto',
choices=['always', 'auto', 'never'],
help=_("Since merge commits are needed for history topology, they "
"are typically exempt from pruning. However, they can become "
"degenerate with the pruning of other commits (having fewer "
"than two parents, having one commit serve as both parents, or "
"having one parent as the ancestor of the other.) If such "
"merge commits have no file changes, they can be pruned. The "
"default ('auto') is to only prune empty merge commits which "
"become degenerate (not which started as such)."))
callback = parser.add_argument_group(title=_("Generic callback code snippets"))
callback.add_argument('--filename-callback', metavar="FUNCTION_BODY",
help=_("Python code body for processing filenames; see CALLBACKS "
@ -3117,6 +3153,8 @@ class RepoFilter(object):
# Create and run the filter
fef = FastExportFilter(self._args.source or '.',
empty_pruning = self._args.empty_pruning,
degenerate_pruning = self._args.degenerate_pruning,
blob_callback = actual_blob_callback,
commit_callback = actual_commit_callback,
tag_callback = actual_tag_callback,

View File

@ -39,6 +39,10 @@ filter_testcase basic basic-numbers --invert-paths --path-regex 'f.*e.*e'
filter_testcase basic basic-mailmap --mailmap ../t9390/sample-mailmap
filter_testcase basic basic-replace --replace-text ../t9390/sample-replace
filter_testcase empty empty-keepme --path keepme
filter_testcase empty more-empty-keepme --path keepme --empty-pruning=always \
--degenerate-pruning=always
filter_testcase empty less-empty-keepme --path keepme --empty-pruning=never \
--degenerate-pruning=never
filter_testcase degenerate degenerate-keepme --path moduleA/keepme
filter_testcase degenerate degenerate-moduleA --path moduleA
filter_testcase degenerate degenerate-globme --path-glob *me

97
t/t9390/less-empty-keepme Normal file
View File

@ -0,0 +1,97 @@
feature done
reset refs/heads/master
commit refs/heads/master
mark :1
author Full Name <user@organization.tld> 1000020000 +0100
committer Full Name <user@organization.tld> 1000020000 +0100
data 2
C
commit refs/heads/master
mark :2
author Full Name <user@organization.tld> 1000030000 +0100
committer Full Name <user@organization.tld> 1000030000 +0100
data 2
D
from :1
reset refs/heads/master
commit refs/heads/master
mark :3
author Full Name <user@organization.tld> 1000000000 +0100
committer Full Name <user@organization.tld> 1000000000 +0100
data 2
A
commit refs/heads/master
mark :4
author Full Name <user@organization.tld> 1000010000 +0100
committer Full Name <user@organization.tld> 1000010000 +0100
data 2
B
from :3
blob
mark :5
data 10
keepme v1
commit refs/heads/master
mark :6
author Full Name <user@organization.tld> 1000040000 +0100
committer Full Name <user@organization.tld> 1000040000 +0100
data 29
E: Merge commit 'D' into 'B'
from :4
merge :2
M 100644 :5 keepme
commit refs/heads/master
mark :7
author Full Name <user@organization.tld> 1000060000 +0100
committer Full Name <user@organization.tld> 1000060000 +0100
data 2
G
from :6
commit refs/heads/master
mark :8
author Full Name <user@organization.tld> 1000070000 +0100
committer Full Name <user@organization.tld> 1000070000 +0100
data 2
H
from :7
commit refs/heads/master
mark :9
author Full Name <user@organization.tld> 1000050000 +0100
committer Full Name <user@organization.tld> 1000050000 +0100
data 29
F: Merge commit 'D' into 'B'
from :4
merge :2
blob
mark :10
data 10
keepme v2
commit refs/heads/master
mark :11
author Full Name <user@organization.tld> 1000080000 +0100
committer Full Name <user@organization.tld> 1000080000 +0100
data 2
I
from :9
M 100644 :10 keepme
commit refs/heads/master
mark :12
author Full Name <user@organization.tld> 1000090000 +0100
committer Full Name <user@organization.tld> 1000090000 +0100
data 29
J: Merge commit 'I' into 'H'
from :8
merge :11
done

39
t/t9390/more-empty-keepme Normal file
View File

@ -0,0 +1,39 @@
feature done
blob
mark :1
data 10
keepme v2
reset refs/heads/master
commit refs/heads/master
mark :2
author Full Name <user@organization.tld> 1000080000 +0100
committer Full Name <user@organization.tld> 1000080000 +0100
data 2
I
M 100644 :1 keepme
blob
mark :3
data 10
keepme v1
reset refs/heads/master
commit refs/heads/master
mark :4
author Full Name <user@organization.tld> 1000040000 +0100
committer Full Name <user@organization.tld> 1000040000 +0100
data 29
E: Merge commit 'D' into 'B'
M 100644 :3 keepme
commit refs/heads/master
mark :5
author Full Name <user@organization.tld> 1000090000 +0100
committer Full Name <user@organization.tld> 1000090000 +0100
data 29
J: Merge commit 'I' into 'H'
from :4
merge :2
done