filter-repo: provide extra metadata to some callbacks

For other programs importing git-filter-repo as a library and passing a
blob, commit, tag, or reset callback to RepoFilter, pass a second
parameter to these functions with extra metadata they might find useful.
For simplicity of implementation, this technically changes the calling
signature of the --*-callback functions passed on the command line, but
we hide that behind a _do_not_use_this_variable parameter for now, leave
it undocumented, and encourage folks who want to use it to write an
actual python program that imports git-filter-repo.  In the future, we
may modify the --*-callback functions to not pass this extra parameter,
or if it is deemed sufficiently useful, then we'll rename the second
parameter and document it.

As already noted in our API compatibilty caveat near the top of
git-filter-repo, I am not guaranteeing API backwards compatibility.
That especially applies to this metadata argument, other than the fact
that it'll be a dict mapping strings to some kind of value.  I might add
more keys, rename them, change the corresponding value, or even remove
keys that used to be part of metadata.

Signed-off-by: Elijah Newren <newren@gmail.com>
This commit is contained in:
Elijah Newren 2019-05-25 10:58:11 -07:00
parent c58e83ea49
commit 0b70b72150
8 changed files with 26 additions and 20 deletions

View File

@ -2454,7 +2454,7 @@ class RepoFilter(object):
def _handle_arg_callbacks(self):
def make_callback(argname, str):
exec('def callback({}):\n'.format(argname)+
exec('def callback({}, _do_not_use_this_var = None):\n'.format(argname)+
' '+'\n '.join(str.splitlines()), globals())
return callback #namespace['callback']
def handle(type):
@ -2828,6 +2828,12 @@ class RepoFilter(object):
if len(orig_parents) >= 2 and len(commit.parents) < 2:
self._commits_no_longer_merges.append((commit.original_id, new_id))
def callback_metadata(self, extra_items = dict()):
return {'commit_rename_func': self._get_rename,
'ancestry_graph': self._graph,
'original_ancestry_graph': self._orig_graph,
**extra_items}
def _tweak_blob(self, blob):
if self._args.replace_text:
for literal, replacement in self._args.replace_text['literals']:
@ -2836,7 +2842,7 @@ class RepoFilter(object):
blob.data = regex.sub(replacement, blob.data)
if self._blob_callback:
self._blob_callback(blob)
self._blob_callback(blob, self.callback_metadata())
def _tweak_commit(self, commit, aux_info):
def filename_matches(path_expression, pathname):
@ -2982,7 +2988,7 @@ class RepoFilter(object):
# Call the user-defined callback, if any
if self._commit_callback:
self._commit_callback(commit)
self._commit_callback(commit, self.callback_metadata(aux_info))
# Now print the resulting commit, or if prunable skip it
if not commit.dumped:
@ -3047,7 +3053,7 @@ class RepoFilter(object):
# Call general purpose tag callback
if self._tag_callback:
self._tag_callback(tag)
self._tag_callback(tag, self.callback_metadata())
def _tweak_reset(self, reset):
if self._args.tag_rename:
@ -3055,7 +3061,7 @@ class RepoFilter(object):
if self._refname_callback:
reset.ref = self._refname_callback(reset.ref)
if self._reset_callback:
self._reset_callback(reset)
self._reset_callback(reset, self.callback_metadata())
# Record we've seen this ref and don't need to force a manual update
# for it.
@ -3187,7 +3193,7 @@ class RepoFilter(object):
# Call any user callback to allow them to modify the reset
if self._reset_callback:
self._reset_callback(reset)
self._reset_callback(reset, self.callback_metadata())
# Now print the resulting reset
reset.dump(self._output)

View File

@ -11,7 +11,7 @@ import datetime
import git_filter_repo as fr
def change_up_them_commits(commit):
def change_up_them_commits(commit, metadata):
# Change the commit author
if commit.author_name == b"Copy N. Paste":
commit.author_name = b"Ima L. Oser"

View File

@ -14,12 +14,12 @@ not try to handle any such special cases.
import sys
import git_filter_repo as fr
def drop_file_by_contents(blob):
def drop_file_by_contents(blob, metadata):
bad_file_contents = b'The launch code is 1-2-3-4.'
if blob.data == bad_file_contents:
blob.skip()
def drop_files_by_name(commit):
def drop_files_by_name(commit, metadata):
new_file_changes = []
for change in commit.file_changes:
if not change.filename.endswith(b'.doc'):

View File

@ -24,12 +24,12 @@ def print_progress():
print("\rRewriting commits... %d/%d (%d objects)"
% (commit_count, total_commits, object_count), end='')
def my_blob_callback(blob):
def my_blob_callback(blob, metadata):
global object_count
object_count += 1
print_progress()
def my_commit_callback(commit):
def my_commit_callback(commit, metadata):
global commit_count
commit_count += 1
print_progress()

View File

@ -13,7 +13,7 @@ not try to handle any such special cases.
import git_filter_repo as fr
def my_commit_callback(commit):
def my_commit_callback(commit, metadata):
if commit.branch == b"refs/heads/master":
commit.branch = b"refs/heads/develop"

View File

@ -24,15 +24,15 @@ class InterleaveRepositories:
self.commit_map = {}
self.last_commit = None
def skip_reset(self, reset):
def skip_reset(self, reset, metadata):
reset.skip()
def hold_commit(self, commit):
def hold_commit(self, commit, metadata):
commit.skip(new_id = commit.id)
letter = re.match(b'Commit (.)', commit.message).group(1)
self.commit_map[letter] = commit
def weave_commit(self, commit):
def weave_commit(self, commit, metadata):
letter = re.match(b'Commit (.)', commit.message).group(1)
prev_letter = bytes([ord(letter)-1])
@ -65,14 +65,14 @@ class InterleaveRepositories:
i1args = fr.FilteringOptions.parse_args(['--source', self.repo1])
i1 = fr.RepoFilter(i1args,
reset_callback = lambda r: self.skip_reset(r),
commit_callback = lambda c: self.hold_commit(c))
reset_callback = self.skip_reset,
commit_callback = self.hold_commit)
i1.set_output(out)
i1.run()
i2args = fr.FilteringOptions.parse_args(['--source', self.repo2])
i2 = fr.RepoFilter(i2args,
commit_callback = lambda c: self.weave_commit(c))
commit_callback = self.weave_commit)
i2.set_output(out)
i2.run()

View File

@ -14,7 +14,7 @@ not try to handle any such special cases.
import re
import git_filter_repo as fr
def strip_cvs_keywords(blob):
def strip_cvs_keywords(blob, metadata):
# FIXME: Should first check if blob is a text file to avoid ruining
# binaries. Could use python.magic here, or just output blob.data to
# the unix 'file' command

View File

@ -106,7 +106,7 @@ stream = io.BytesIO(textwrap.dedent('''
'''[1:]).encode())
counts = collections.Counter()
def look_for_reset(obj):
def look_for_reset(obj, metadata):
print("Processing {}".format(obj))
counts[type(obj)] += 1
if type(obj) == fr.Reset: